ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.3 by gutsche, Sun May 28 02:27:52 2006 UTC vs.
Revision 1.128 by fanzago, Thu Oct 11 16:23:44 2007 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 + from BlackWhiteListParser import BlackWhiteListParser
6   import common
6 import PsetManipulator  
7
8 import DBSInfo_EDM
9 #from DataDiscovery_EDM import DataDiscovery_EDM
10 import DataDiscovery_EDM
11 #from DataLocation_EDM import DataLocation_EDM
12 import DataLocation_EDM
7   import Scram
8  
9 < import os, string, re
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12 <    def __init__(self, cfg_params):
12 >    def __init__(self, cfg_params, ncjobs):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15  
22        self.analisys_common_info = {}
23        # Marco.
16          self._params = {}
17          self.cfg_params = cfg_params
18  
19 +        # init BlackWhiteListParser
20 +        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21 +
22 +        try:
23 +            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24 +        except KeyError:
25 +            self.MaxTarBallSize = 9.5
26 +
27 +        # number of jobs requested to be created, limit obj splitting
28 +        self.ncjobs = ncjobs
29 +
30          log = common.logger
31          
32          self.scram = Scram.Scram(cfg_params)
30        scramArea = ''
33          self.additional_inbox_files = []
34          self.scriptExe = ''
35          self.executable = ''
36 +        self.executable_arch = self.scram.getArch()
37          self.tgz_name = 'default.tgz'
38 +        self.additional_tgz_name = 'additional.tgz'
39 +        self.scriptName = 'CMSSW.sh'
40 +        self.pset = ''      #scrip use case Da  
41 +        self.datasetPath = '' #scrip use case Da
42  
43 +        # set FJR file name
44 +        self.fjrFileName = 'crab_fjr.xml'
45  
46          self.version = self.scram.getSWVersion()
47 <        common.analisys_common_info['sw_version'] = self.version
48 <        ### FEDE
49 <        common.analisys_common_info['copy_input_data'] = 0
50 <        common.analisys_common_info['events_management'] = 1
47 >        
48 >        #
49 >        # Try to block creation in case of arch/version mismatch
50 >        #
51 >
52 >        a = string.split(self.version, "_")
53 >
54 >        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55 >            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
56 >            raise CrabException(msg)
57 >        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58 >            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59 >            raise CrabException(msg)
60 >        
61 >        common.taskDB.setDict('codeVersion',self.version)
62 >        self.setParam_('application', self.version)
63  
64          ### collect Data cards
65 +
66 +        ## get DBS mode
67          try:
68 <         #   self.owner = cfg_params['CMSSW.owner']
69 <         #   log.debug(6, "CMSSW::CMSSW(): owner = "+self.owner)
70 <         #   self.dataset = cfg_params['CMSSW.dataset']
71 <            self.datasetPath = cfg_params['CMSSW.datasetpath']
72 <            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+self.datasetPath)
68 >            self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 >        except KeyError:
70 >            self.use_dbs_1 = 0
71 >            
72 >        try:
73 >            tmp =  cfg_params['CMSSW.datasetpath']
74 >            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
75 >            if string.lower(tmp)=='none':
76 >                self.datasetPath = None
77 >                self.selectNoInput = 1
78 >            else:
79 >                self.datasetPath = tmp
80 >                self.selectNoInput = 0
81          except KeyError:
51        #    msg = "Error: owner and/or dataset not defined "
82              msg = "Error: datasetpath not defined "  
83              raise CrabException(msg)
84 +
85 +        # ML monitoring
86 +        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
87 +        if not self.datasetPath:
88 +            self.setParam_('dataset', 'None')
89 +            self.setParam_('owner', 'None')
90 +        else:
91 +            try:
92 +                datasetpath_split = self.datasetPath.split("/")
93 +                # standard style
94 +                self.setParam_('datasetFull', self.datasetPath)
95 +                if self.use_dbs_1 == 1 :
96 +                    self.setParam_('dataset', datasetpath_split[1])
97 +                    self.setParam_('owner', datasetpath_split[-1])
98 +                else:
99 +                    self.setParam_('dataset', datasetpath_split[1])
100 +                    self.setParam_('owner', datasetpath_split[2])
101 +            except:
102 +                self.setParam_('dataset', self.datasetPath)
103 +                self.setParam_('owner', self.datasetPath)
104 +                
105 +        self.setTaskid_()
106 +        self.setParam_('taskId', self.cfg_params['taskId'])
107 +
108          self.dataTiers = []
55 #       try:
56 #           tmpDataTiers = string.split(cfg_params['CMSSW.data_tier'],',')
57 #           for tmp in tmpDataTiers:
58 #               tmp=string.strip(tmp)
59 #               self.dataTiers.append(tmp)
60 #               pass
61 #           pass
62 #       except KeyError:
63 #           pass
64 #       log.debug(6, "Cmssw::Cmssw(): dataTiers = "+str(self.dataTiers))
109  
110          ## now the application
111          try:
112              self.executable = cfg_params['CMSSW.executable']
113 +            self.setParam_('exe', self.executable)
114              log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
115              msg = "Default executable cmsRun overridden. Switch to " + self.executable
116              log.debug(3,msg)
117          except KeyError:
118              self.executable = 'cmsRun'
119 +            self.setParam_('exe', self.executable)
120              msg = "User executable not defined. Use cmsRun"
121              log.debug(3,msg)
122              pass
# Line 78 | Line 124 | class Cmssw(JobType):
124          try:
125              self.pset = cfg_params['CMSSW.pset']
126              log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 <            if (not os.path.exists(self.pset)):
128 <                raise CrabException("User defined PSet file "+self.pset+" does not exist")
127 >            if self.pset.lower() != 'none' :
128 >                if (not os.path.exists(self.pset)):
129 >                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
130 >            else:
131 >                self.pset = None
132          except KeyError:
133              raise CrabException("PSet file missing. Cannot run cmsRun ")
134  
135          # output files
136 +        ## stuff which must be returned always via sandbox
137 +        self.output_file_sandbox = []
138 +
139 +        # add fjr report by default via sandbox
140 +        self.output_file_sandbox.append(self.fjrFileName)
141 +
142 +        # other output files to be returned via sandbox or copied to SE
143          try:
144              self.output_file = []
89
145              tmp = cfg_params['CMSSW.output_file']
146              if tmp != '':
147                  tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
# Line 96 | Line 151 | class Cmssw(JobType):
151                      self.output_file.append(tmp)
152                      pass
153              else:
154 <                log.message("No output file defined: only stdout/err will be available")
154 >                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155                  pass
156              pass
157          except KeyError:
158 <            log.message("No output file defined: only stdout/err will be available")
158 >            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159              pass
160  
161          # script_exe file as additional file in inputSandbox
162          try:
163 <           self.scriptExe = cfg_params['USER.script_exe']
164 <           self.additional_inbox_files.append(self.scriptExe)
163 >            self.scriptExe = cfg_params['USER.script_exe']
164 >            if self.scriptExe != '':
165 >               if not os.path.isfile(self.scriptExe):
166 >                  msg ="ERROR. file "+self.scriptExe+" not found"
167 >                  raise CrabException(msg)
168 >               self.additional_inbox_files.append(string.strip(self.scriptExe))
169          except KeyError:
170 <           pass
171 <        if self.scriptExe != '':
172 <           if os.path.isfile(self.scriptExe):
173 <              pass
174 <           else:
175 <              log.message("WARNING. file "+self.scriptExe+" not found")
176 <              sys.exit()
118 <                  
170 >            self.scriptExe = ''
171 >
172 >        #CarlosDaniele
173 >        if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174 >           msg ="Error. script_exe  not defined"
175 >           raise CrabException(msg)
176 >
177          ## additional input files
178          try:
179 <            tmpAddFiles = string.split(cfg_params['CMSSW.additional_input_files'],',')
179 >            tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
180              for tmp in tmpAddFiles:
181 <                if not os.path.exists(tmp):
182 <                    raise CrabException("Additional input file not found: "+tmp)
183 <                tmp=string.strip(tmp)
184 <                self.additional_inbox_files.append(tmp)
181 >                tmp = string.strip(tmp)
182 >                dirname = ''
183 >                if not tmp[0]=="/": dirname = "."
184 >                files = []
185 >                if string.find(tmp,"*")>-1:
186 >                    files = glob.glob(os.path.join(dirname, tmp))
187 >                    if len(files)==0:
188 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
189 >                else:
190 >                    files.append(tmp)
191 >                for file in files:
192 >                    if not os.path.exists(file):
193 >                        raise CrabException("Additional input file not found: "+file)
194 >                    pass
195 >                    # fname = string.split(file, '/')[-1]
196 >                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
197 >                    # shutil.copyfile(file, storedFile)
198 >                    self.additional_inbox_files.append(string.strip(file))
199                  pass
200              pass
201 +            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
202          except KeyError:
203              pass
204  
205 +        # files per job
206          try:
207 <            self.filesPerJob = int(cfg_params['CMSSW.files_per_jobs']) #Daniele
207 >            if (cfg_params['CMSSW.files_per_jobs']):
208 >                raise CrabException("files_per_jobs no longer supported.  Quitting.")
209          except KeyError:
210 <            self.filesPerJob = 1
210 >            pass
211  
212 <        ## Max event   will be total_number_of_events ???  Daniele
212 >        ## Events per job
213 >        try:
214 >            self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
215 >            self.selectEventsPerJob = 1
216 >        except KeyError:
217 >            self.eventsPerJob = -1
218 >            self.selectEventsPerJob = 0
219 >    
220 >        ## number of jobs
221          try:
222 <            self.maxEv = cfg_params['CMSSW.event_per_job']
222 >            self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
223 >            self.selectNumberOfJobs = 1
224          except KeyError:
225 <            self.maxEv = "-1"
226 <        ##  
225 >            self.theNumberOfJobs = 0
226 >            self.selectNumberOfJobs = 0
227 >
228          try:
229              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
230 +            self.selectTotalNumberEvents = 1
231          except KeyError:
232 <            msg = 'Must define total_number_of_events'
233 <            raise CrabException(msg)
234 <        
235 <        CEBlackList = []
232 >            self.total_number_of_events = 0
233 >            self.selectTotalNumberEvents = 0
234 >
235 >        if self.pset != None: #CarlosDaniele
236 >             if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
237 >                 msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
238 >                 raise CrabException(msg)
239 >        else:
240 >             if (self.selectNumberOfJobs == 0):
241 >                 msg = 'Must specify  number_of_jobs.'
242 >                 raise CrabException(msg)
243 >
244 >        ## source seed for pythia
245          try:
246 <            tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
152 <            for tmp in tmpBad:
153 <                tmp=string.strip(tmp)
154 <                CEBlackList.append(tmp)
246 >            self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
247          except KeyError:
248 <            pass
248 >            self.sourceSeed = None
249 >            common.logger.debug(5,"No seed given")
250  
251 <        self.reCEBlackList=[]
252 <        for bad in CEBlackList:
160 <            self.reCEBlackList.append(re.compile( bad ))
161 <
162 <        common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
163 <
164 <        CEWhiteList = []
165 <        try:
166 <            tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
167 <            for tmp in tmpGood:
168 <                tmp=string.strip(tmp)
169 <                CEWhiteList.append(tmp)
251 >        try:
252 >            self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
253          except KeyError:
254 <            pass
254 >            self.sourceSeedVtx = None
255 >            common.logger.debug(5,"No vertex seed given")
256  
257 <        #print 'CEWhiteList: ',CEWhiteList
258 <        self.reCEWhiteList=[]
259 <        for Good in CEWhiteList:
260 <            self.reCEWhiteList.append(re.compile( Good ))
257 >        try:
258 >            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259 >        except KeyError:
260 >            self.sourceSeedG4 = None
261 >            common.logger.debug(5,"No g4 sim hits seed given")
262  
263 <        common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList))
263 >        try:
264 >            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265 >        except KeyError:
266 >            self.sourceSeedMix = None
267 >            common.logger.debug(5,"No mix seed given")
268  
269 <        self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
269 >        try:
270 >            self.firstRun = int(cfg_params['CMSSW.first_run'])
271 >        except KeyError:
272 >            self.firstRun = None
273 >            common.logger.debug(5,"No first run given")
274 >        if self.pset != None: #CarlosDaniele
275 >            ver = string.split(self.version,"_")
276 >            if (int(ver[1])>=1 and int(ver[2])>=5):
277 >                import PsetManipulator150 as pp
278 >            else:
279 >                import PsetManipulator as pp
280 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
281  
282          #DBSDLS-start
283          ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
284          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
285          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
286 +        self.jobDestination=[]  # Site destination(s) for each job (list of lists)
287          ## Perform the data location and discovery (based on DBS/DLS)
288 <        self.DataDiscoveryAndLocation(cfg_params)
288 >        ## SL: Don't if NONE is specified as input (pythia use case)
289 >        blockSites = {}
290 >        if self.datasetPath:
291 >            blockSites = self.DataDiscoveryAndLocation(cfg_params)
292          #DBSDLS-end          
293  
294          self.tgzNameWithPath = self.getTarBall(self.executable)
295 +    
296 +        ## Select Splitting
297 +        if self.selectNoInput:
298 +            if self.pset == None: #CarlosDaniele
299 +                self.jobSplittingForScript()
300 +            else:
301 +                self.jobSplittingNoInput()
302 +        else:
303 +            self.jobSplittingByBlocks(blockSites)
304  
305 <        self.jobSplitting()  #Daniele job Splitting
306 <        self.PsetEdit.maxEvent(self.maxEv) #Daniele  
307 <        self.PsetEdit.inputModule("INPUT") #Daniele  
308 <        self.PsetEdit.psetWriter(self.configFilename())
305 >        # modify Pset
306 >        if self.pset != None: #CarlosDaniele
307 >            try:
308 >                if (self.datasetPath): # standard job
309 >                    # allow to processa a fraction of events in a file
310 >                    PsetEdit.inputModule("INPUT")
311 >                    PsetEdit.maxEvent("INPUTMAXEVENTS")
312 >                    PsetEdit.skipEvent("INPUTSKIPEVENTS")
313 >                else:  # pythia like job
314 >                    PsetEdit.maxEvent(self.eventsPerJob)
315 >                    if (self.firstRun):
316 >                        PsetEdit.pythiaFirstRun("INPUTFIRSTRUN")  #First Run
317 >                    if (self.sourceSeed) :
318 >                        PsetEdit.pythiaSeed("INPUT")
319 >                        if (self.sourceSeedVtx) :
320 >                            PsetEdit.vtxSeed("INPUTVTX")
321 >                        if (self.sourceSeedG4) :
322 >                            PsetEdit.g4Seed("INPUTG4")
323 >                        if (self.sourceSeedMix) :
324 >                            PsetEdit.mixSeed("INPUTMIX")
325 >                # add FrameworkJobReport to parameter-set
326 >                PsetEdit.addCrabFJR(self.fjrFileName)
327 >                PsetEdit.psetWriter(self.configFilename())
328 >            except:
329 >                msg='Error while manipuliating ParameterSet: exiting...'
330 >                raise CrabException(msg)
331  
332      def DataDiscoveryAndLocation(self, cfg_params):
333  
334 +        import DataDiscovery
335 +        import DataDiscovery_DBS2
336 +        import DataLocation
337          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
338  
201        #datasetPath = "/"+self.owner+"/"+self.dataTiers[0]+"/"+self.dataset
202        
339          datasetPath=self.datasetPath
340  
205        ## TODO
206        dataTiersList = ""
207        dataTiers = dataTiersList.split(',')
208
341          ## Contact the DBS
342 +        common.logger.message("Contacting Data Discovery Services ...")
343          try:
344 <            self.pubdata=DataDiscovery_EDM.DataDiscovery_EDM(datasetPath, dataTiers, dataTiers)
344 >
345 >            if self.use_dbs_1 == 1 :
346 >                self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
347 >            else :
348 >                self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
349              self.pubdata.fetchDBSInfo()
350  
351 <        except DataDiscovery_EDM.NotExistingDatasetError, ex :
351 >        except DataDiscovery.NotExistingDatasetError, ex :
352              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
353              raise CrabException(msg)
354 <
355 <        except DataDiscovery_EDM.NoDataTierinProvenanceError, ex :
354 >        except DataDiscovery.NoDataTierinProvenanceError, ex :
355 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
356 >            raise CrabException(msg)
357 >        except DataDiscovery.DataDiscoveryError, ex:
358 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
359 >            raise CrabException(msg)
360 >        except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
361 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
362 >            raise CrabException(msg)
363 >        except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
364              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
365              raise CrabException(msg)
366 <        except DataDiscovery_EDM.DataDiscoveryError, ex:
367 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
366 >        except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
367 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
368              raise CrabException(msg)
369  
370 <        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
371 <        ## self.DBSPaths=self.pubdata.getDBSPaths()
372 <        common.logger.message("Required data are :"+self.datasetPath)
228 <
229 <        filesbyblock=self.pubdata.getFiles()
230 <        self.AllInputFiles=filesbyblock.values()
231 <        self.files = self.AllInputFiles        
232 <
233 <        ## TEMP
234 <    #    self.filesTmp = filesbyblock.values()
235 <    #    self.files = []
236 <    #    locPath='rfio:cmsbose2.bo.infn.it:/flatfiles/SE00/cms/fanfani/ProdTest/'
237 <    #    locPath=''
238 <    #    tmp = []
239 <    #    for file in self.filesTmp[0]:
240 <    #        tmp.append(locPath+file)
241 <    #    self.files.append(tmp)
242 <        ## END TEMP
370 >        self.filesbyblock=self.pubdata.getFiles()
371 >        self.eventsbyblock=self.pubdata.getEventsPerBlock()
372 >        self.eventsbyfile=self.pubdata.getEventsPerFile()
373  
374          ## get max number of events
245        #common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents())
375          self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
247        common.logger.message("\nThe number of available events is %s"%self.maxEvents)
376  
377          ## Contact the DLS and build a list of sites hosting the fileblocks
378          try:
379 <            dataloc=DataLocation_EDM.DataLocation_EDM(filesbyblock.keys(),cfg_params)
379 >            dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
380              dataloc.fetchDLSInfo()
381 <        except DataLocation_EDM.DataLocationError , ex:
381 >        except DataLocation.DataLocationError , ex:
382              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
383              raise CrabException(msg)
384          
257        allsites=dataloc.getSites()
258        common.logger.debug(5,"sites are %s"%allsites)
259        sites=self.checkBlackList(allsites)
260        common.logger.debug(5,"sites are (after black list) %s"%sites)
261        sites=self.checkWhiteList(sites)
262        common.logger.debug(5,"sites are (after white list) %s"%sites)
263
264        if len(sites)==0:
265            msg = 'No sites hosting all the needed data! Exiting... '
266            raise CrabException(msg)
385  
386 <        common.logger.message("List of Sites hosting the data : "+str(sites))
387 <        common.logger.debug(6, "List of Sites: "+str(sites))
388 <        common.analisys_common_info['sites']=sites    ## used in SchedulerEdg.py in createSchScript
389 <        return
386 >        sites = dataloc.getSites()
387 >        allSites = []
388 >        listSites = sites.values()
389 >        for listSite in listSites:
390 >            for oneSite in listSite:
391 >                allSites.append(oneSite)
392 >        allSites = self.uniquelist(allSites)
393 >
394 >        # screen output
395 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
396 >
397 >        return sites
398      
399 <    def jobSplitting(self):
399 >    def jobSplittingByBlocks(self, blockSites):
400          """
401 <        first implemntation for job splitting  
402 <        """    
403 <      #  print 'eventi totali '+str(self.maxEvents)
404 <      #  print 'eventi totali richiesti dallo user '+str(self.total_number_of_events)
405 <        #print 'files per job '+str(self.filesPerJob)
406 <        common.logger.message('Required '+str(self.filesPerJob)+' files per job ')
407 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
408 <
409 <        ## TODO: SL need to have (from DBS) a detailed list of how many events per each file
410 <        n_tot_files = (len(self.files[0]))
411 <        ## SL: this is wrong if the files have different number of events
412 <        evPerFile = int(self.maxEvents)/n_tot_files
413 <        
414 <        common.logger.debug(5,'Events per File '+str(evPerFile))
415 <
416 <        ## if asked to process all events, do it
417 <        if self.total_number_of_events == -1:
418 <            self.total_number_of_events=self.maxEvents
419 <            self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob)
420 <            common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for all available events '+str(self.total_number_of_events)+' events')
421 <        
401 >        Perform job splitting. Jobs run over an integer number of files
402 >        and no more than one block.
403 >        ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
404 >        REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
405 >                  self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
406 >                  self.maxEvents, self.filesbyblock
407 >        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
408 >              self.total_number_of_jobs - Total # of jobs
409 >              self.list_of_args - File(s) job will run on (a list of lists)
410 >        """
411 >
412 >        # ---- Handle the possible job splitting configurations ---- #
413 >        if (self.selectTotalNumberEvents):
414 >            totalEventsRequested = self.total_number_of_events
415 >        if (self.selectEventsPerJob):
416 >            eventsPerJobRequested = self.eventsPerJob
417 >            if (self.selectNumberOfJobs):
418 >                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
419 >
420 >        # If user requested all the events in the dataset
421 >        if (totalEventsRequested == -1):
422 >            eventsRemaining=self.maxEvents
423 >        # If user requested more events than are in the dataset
424 >        elif (totalEventsRequested > self.maxEvents):
425 >            eventsRemaining = self.maxEvents
426 >            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
427 >        # If user requested less events than are in the dataset
428          else:
429 <            self.total_number_of_files = int(self.total_number_of_events/evPerFile)
298 <            ## SL: if ask for less event than what is computed to be available on a
299 <            ##     file, process the first file anyhow.
300 <            if self.total_number_of_files == 0:
301 <                self.total_number_of_files = self.total_number_of_files + 1
429 >            eventsRemaining = totalEventsRequested
430  
431 <            common.logger.debug(5,'N files  '+str(self.total_number_of_files))
431 >        # If user requested more events per job than are in the dataset
432 >        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
433 >            eventsPerJobRequested = self.maxEvents
434  
435 <            check = 0
435 >        # For user info at end
436 >        totalEventCount = 0
437 >
438 >        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
439 >            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
440 >
441 >        if (self.selectNumberOfJobs):
442 >            common.logger.message("May not create the exact number_of_jobs requested.")
443 >
444 >        if ( self.ncjobs == 'all' ) :
445 >            totalNumberOfJobs = 999999999
446 >        else :
447 >            totalNumberOfJobs = self.ncjobs
448              
307            ## Compute the number of jobs
308            #self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob)
309            self.total_number_of_jobs = int(self.total_number_of_files/self.filesPerJob)
310            common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
449  
450 <            ## is there any remainder?
451 <            check = int(self.total_number_of_files) - (int(self.total_number_of_jobs)*self.filesPerJob)
450 >        blocks = blockSites.keys()
451 >        blockCount = 0
452 >        # Backup variable in case self.maxEvents counted events in a non-included block
453 >        numBlocksInDataset = len(blocks)
454  
455 <            common.logger.debug(5,'Check  '+str(check))
455 >        jobCount = 0
456 >        list_of_lists = []
457  
458 <            if check > 0:
459 <                self.total_number_of_jobs =  self.total_number_of_jobs + 1
319 <                common.logger.message('Warning: last job will be created with '+str(check)+' files')
458 >        # list tracking which jobs are in which jobs belong to which block
459 >        jobsOfBlock = {}
460  
461 <            common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for a total of '+str((self.total_number_of_jobs-1)*self.filesPerJob*evPerFile + check*evPerFile)+' events')
461 >        # ---- Iterate over the blocks in the dataset until ---- #
462 >        # ---- we've met the requested total # of events    ---- #
463 >        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
464 >            block = blocks[blockCount]
465 >            blockCount += 1
466 >            if block not in jobsOfBlock.keys() :
467 >                jobsOfBlock[block] = []
468 >            
469 >            if self.eventsbyblock.has_key(block) :
470 >                numEventsInBlock = self.eventsbyblock[block]
471 >                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
472 >            
473 >                files = self.filesbyblock[block]
474 >                numFilesInBlock = len(files)
475 >                if (numFilesInBlock <= 0):
476 >                    continue
477 >                fileCount = 0
478 >
479 >                # ---- New block => New job ---- #
480 >                parString = "\\{"
481 >                # counter for number of events in files currently worked on
482 >                filesEventCount = 0
483 >                # flag if next while loop should touch new file
484 >                newFile = 1
485 >                # job event counter
486 >                jobSkipEventCount = 0
487 >            
488 >                # ---- Iterate over the files in the block until we've met the requested ---- #
489 >                # ---- total # of events or we've gone over all the files in this block  ---- #
490 >                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
491 >                    file = files[fileCount]
492 >                    if newFile :
493 >                        try:
494 >                            numEventsInFile = self.eventsbyfile[file]
495 >                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
496 >                            # increase filesEventCount
497 >                            filesEventCount += numEventsInFile
498 >                            # Add file to current job
499 >                            parString += '\\\"' + file + '\\\"\,'
500 >                            newFile = 0
501 >                        except KeyError:
502 >                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
503 >                        
504 >
505 >                    # if less events in file remain than eventsPerJobRequested
506 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
507 >                        # if last file in block
508 >                        if ( fileCount == numFilesInBlock-1 ) :
509 >                            # end job using last file, use remaining events in block
510 >                            # close job and touch new file
511 >                            fullString = parString[:-2]
512 >                            fullString += '\\}'
513 >                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
514 >                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
515 >                            self.jobDestination.append(blockSites[block])
516 >                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
517 >                            # fill jobs of block dictionary
518 >                            jobsOfBlock[block].append(jobCount+1)
519 >                            # reset counter
520 >                            jobCount = jobCount + 1
521 >                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
522 >                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
523 >                            jobSkipEventCount = 0
524 >                            # reset file
525 >                            parString = "\\{"
526 >                            filesEventCount = 0
527 >                            newFile = 1
528 >                            fileCount += 1
529 >                        else :
530 >                            # go to next file
531 >                            newFile = 1
532 >                            fileCount += 1
533 >                    # if events in file equal to eventsPerJobRequested
534 >                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
535 >                        # close job and touch new file
536 >                        fullString = parString[:-2]
537 >                        fullString += '\\}'
538 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
539 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
540 >                        self.jobDestination.append(blockSites[block])
541 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
542 >                        jobsOfBlock[block].append(jobCount+1)
543 >                        # reset counter
544 >                        jobCount = jobCount + 1
545 >                        totalEventCount = totalEventCount + eventsPerJobRequested
546 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
547 >                        jobSkipEventCount = 0
548 >                        # reset file
549 >                        parString = "\\{"
550 >                        filesEventCount = 0
551 >                        newFile = 1
552 >                        fileCount += 1
553 >                        
554 >                    # if more events in file remain than eventsPerJobRequested
555 >                    else :
556 >                        # close job but don't touch new file
557 >                        fullString = parString[:-2]
558 >                        fullString += '\\}'
559 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
560 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
561 >                        self.jobDestination.append(blockSites[block])
562 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
563 >                        jobsOfBlock[block].append(jobCount+1)
564 >                        # increase counter
565 >                        jobCount = jobCount + 1
566 >                        totalEventCount = totalEventCount + eventsPerJobRequested
567 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
568 >                        # calculate skip events for last file
569 >                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
570 >                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
571 >                        # remove all but the last file
572 >                        filesEventCount = self.eventsbyfile[file]
573 >                        parString = "\\{"
574 >                        parString += '\\\"' + file + '\\\"\,'
575 >                    pass # END if
576 >                pass # END while (iterate over files in the block)
577 >        pass # END while (iterate over blocks in the dataset)
578 >        self.ncjobs = self.total_number_of_jobs = jobCount
579 >        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
580 >            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
581 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
582 >        
583 >        # screen output
584 >        screenOutput = "List of jobs and available destination sites:\n\n"
585 >
586 >        # keep trace of block with no sites to print a warning at the end
587 >        noSiteBlock = []
588 >        bloskNoSite = []
589 >
590 >        blockCounter = 0
591 >        for block in blocks:
592 >            if block in jobsOfBlock.keys() :
593 >                blockCounter += 1
594 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
595 >                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
596 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
597 >                    bloskNoSite.append( blockCounter )
598 >        
599 >        common.logger.message(screenOutput)
600 >        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
601 >            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
602 >            virgola = ""
603 >            if len(bloskNoSite) > 1:
604 >                virgola = ","
605 >            for block in bloskNoSite:
606 >                msg += ' ' + str(block) + virgola
607 >            msg += '\n               Related jobs:\n                 '
608 >            virgola = ""
609 >            if len(noSiteBlock) > 1:
610 >                virgola = ","
611 >            for range_jobs in noSiteBlock:
612 >                msg += str(range_jobs) + virgola
613 >            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
614 >            common.logger.message(msg)
615 >
616 >        self.list_of_args = list_of_lists
617 >        return
618 >
619 >    def jobSplittingNoInput(self):
620 >        """
621 >        Perform job splitting based on number of event per job
622 >        """
623 >        common.logger.debug(5,'Splitting per events')
624 >        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
625 >        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
626 >        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
627 >
628 >        if (self.total_number_of_events < 0):
629 >            msg='Cannot split jobs per Events with "-1" as total number of events'
630 >            raise CrabException(msg)
631 >
632 >        if (self.selectEventsPerJob):
633 >            if (self.selectTotalNumberEvents):
634 >                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
635 >            elif(self.selectNumberOfJobs) :  
636 >                self.total_number_of_jobs =self.theNumberOfJobs
637 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
638 >
639 >        elif (self.selectNumberOfJobs) :
640 >            self.total_number_of_jobs = self.theNumberOfJobs
641 >            self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
642 >
643 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
644 >
645 >        # is there any remainder?
646 >        check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
647 >
648 >        common.logger.debug(5,'Check  '+str(check))
649 >
650 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
651 >        if check > 0:
652 >            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
653 >
654 >        # argument is seed number.$i
655 >        self.list_of_args = []
656 >        for i in range(self.total_number_of_jobs):
657 >            ## Since there is no input, any site is good
658 >           # self.jobDestination.append(["Any"])
659 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
660 >            args=[]
661 >            if (self.firstRun):
662 >                    ## pythia first run
663 >                #self.list_of_args.append([(str(self.firstRun)+str(i))])
664 >                args.append(str(self.firstRun)+str(i))
665 >            else:
666 >                ## no first run
667 >                #self.list_of_args.append([str(i)])
668 >                args.append(str(i))
669 >            if (self.sourceSeed):
670 >                args.append(str(self.sourceSeed)+str(i))
671 >                if (self.sourceSeedVtx):
672 >                    ## + vtx random seed
673 >                    args.append(str(self.sourceSeedVtx)+str(i))
674 >                if (self.sourceSeedG4):
675 >                    ## + G4 random seed
676 >                    args.append(str(self.sourceSeedG4)+str(i))
677 >                if (self.sourceSeedMix):    
678 >                    ## + Mix random seed
679 >                    args.append(str(self.sourceSeedMix)+str(i))
680 >                pass
681              pass
682 +            self.list_of_args.append(args)
683 +        pass
684 +            
685 +        # print self.list_of_args
686  
687 <        list_of_lists = []
325 <        for i in xrange(0, int(n_tot_files), self.filesPerJob):
326 <            list_of_lists.append(self.files[0][i: i+self.filesPerJob])
687 >        return
688  
689 <        self.list_of_files = list_of_lists
690 <      
689 >
690 >    def jobSplittingForScript(self):#CarlosDaniele
691 >        """
692 >        Perform job splitting based on number of job
693 >        """
694 >        common.logger.debug(5,'Splitting per job')
695 >        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
696 >
697 >        self.total_number_of_jobs = self.theNumberOfJobs
698 >
699 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
700 >
701 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
702 >
703 >        # argument is seed number.$i
704 >        self.list_of_args = []
705 >        for i in range(self.total_number_of_jobs):
706 >            ## Since there is no input, any site is good
707 >           # self.jobDestination.append(["Any"])
708 >            self.jobDestination.append([""])
709 >            ## no random seed
710 >            self.list_of_args.append([str(i)])
711          return
712  
713      def split(self, jobParams):
# Line 334 | Line 715 | class Cmssw(JobType):
715          common.jobDB.load()
716          #### Fabio
717          njobs = self.total_number_of_jobs
718 <        filelist = self.list_of_files
718 >        arglist = self.list_of_args
719          # create the empty structure
720          for i in range(njobs):
721              jobParams.append("")
722          
723          for job in range(njobs):
724 <            jobParams[job] = filelist[job]
724 >            jobParams[job] = arglist[job]
725 >            # print str(arglist[job])
726 >            # print jobParams[job]
727              common.jobDB.setArguments(job, jobParams[job])
728 +            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
729 +            common.jobDB.setDestination(job, self.jobDestination[job])
730  
731          common.jobDB.save()
732          return
733      
734      def getJobTypeArguments(self, nj, sched):
735 <        params = common.jobDB.arguments(nj)
736 <        #print params
737 <        parString = "\\{"
738 <        
354 <        for i in range(len(params) - 1):
355 <            parString += '\\\"' + params[i] + '\\\"\,'
356 <        
357 <        parString += '\\\"' + params[len(params) - 1] + '\\\"\\}'
358 <        return parString
735 >        result = ''
736 >        for i in common.jobDB.arguments(nj):
737 >            result=result+str(i)+" "
738 >        return result
739    
740      def numberOfJobs(self):
741          # Fabio
362
742          return self.total_number_of_jobs
364
365
366
367    def checkBlackList(self, allSites):
368        if len(self.reCEBlackList)==0: return allSites
369        sites = []
370        for site in allSites:
371            common.logger.debug(10,'Site '+site)
372            good=1
373            for re in self.reCEBlackList:
374                if re.search(site):
375                    common.logger.message('CE in black list, skipping site '+site)
376                    good=0
377                pass
378            if good: sites.append(site)
379        if len(sites) == 0:
380            common.logger.debug(3,"No sites found after BlackList")
381        return sites
382
383    def checkWhiteList(self, allSites):
384
385        if len(self.reCEWhiteList)==0: return allSites
386        sites = []
387        for site in allSites:
388            good=0
389            for re in self.reCEWhiteList:
390                if re.search(site):
391                    common.logger.debug(5,'CE in white list, adding site '+site)
392                    good=1
393                if not good: continue
394                sites.append(site)
395        if len(sites) == 0:
396            common.logger.message("No sites found after WhiteList\n")
397        else:
398            common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n")
399        return sites
743  
744      def getTarBall(self, exe):
745          """
# Line 404 | Line 747 | class Cmssw(JobType):
747          """
748          
749          # if it exist, just return it
750 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
750 >        #
751 >        # Marco. Let's start to use relative path for Boss XML files
752 >        #
753 >        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
754          if os.path.exists(self.tgzNameWithPath):
755              return self.tgzNameWithPath
756  
# Line 418 | Line 764 | class Cmssw(JobType):
764          # First of all declare the user Scram area
765          swArea = self.scram.getSWArea_()
766          #print "swArea = ", swArea
767 <        swVersion = self.scram.getSWVersion()
768 <        #print "swVersion = ", swVersion
767 >        # swVersion = self.scram.getSWVersion()
768 >        # print "swVersion = ", swVersion
769          swReleaseTop = self.scram.getReleaseTop_()
770          #print "swReleaseTop = ", swReleaseTop
771          
# Line 427 | Line 773 | class Cmssw(JobType):
773          if swReleaseTop == '' or swArea == swReleaseTop:
774              return
775  
776 <        filesToBeTarred = []
777 <        ## First find the executable
778 <        if (self.executable != ''):
779 <            exeWithPath = self.scram.findFile_(executable)
780 < #           print exeWithPath
781 <            if ( not exeWithPath ):
782 <                raise CrabException('User executable '+executable+' not found')
783 <
784 <            ## then check if it's private or not
785 <            if exeWithPath.find(swReleaseTop) == -1:
786 <                # the exe is private, so we must ship
787 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
788 <                path = swArea+'/'
789 <                exe = string.replace(exeWithPath, path,'')
790 <                filesToBeTarred.append(exe)
791 <                pass
792 <            else:
793 <                # the exe is from release, we'll find it on WN
794 <                pass
795 <
796 <        ## Now get the libraries: only those in local working area
797 <        libDir = 'lib'
798 <        lib = swArea+'/' +libDir
799 <        common.logger.debug(5,"lib "+lib+" to be tarred")
800 <        if os.path.exists(lib):
801 <            filesToBeTarred.append(libDir)
802 <
803 <        ## Now check if module dir is present
804 <        moduleDir = 'module'
805 <        if os.path.isdir(swArea+'/'+moduleDir):
806 <            filesToBeTarred.append(moduleDir)
807 <
808 <        ## Now check if the Data dir is present
809 <        dataDir = 'src/Data/'
810 <        if os.path.isdir(swArea+'/'+dataDir):
811 <            filesToBeTarred.append(dataDir)
812 <
813 <        ## Create the tar-ball
814 <        if len(filesToBeTarred)>0:
815 <            cwd = os.getcwd()
816 <            os.chdir(swArea)
817 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
818 <            for line in filesToBeTarred:
819 <                tarcmd = tarcmd + line + ' '
820 <            cout = runCommand(tarcmd)
821 <            if not cout:
822 <                raise CrabException('Could not create tar-ball')
823 <            os.chdir(cwd)
824 <        else:
825 <            common.logger.debug(5,"No files to be to be tarred")
776 >        import tarfile
777 >        try: # create tar ball
778 >            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
779 >            ## First find the executable
780 >            if (self.executable != ''):
781 >                exeWithPath = self.scram.findFile_(executable)
782 >                if ( not exeWithPath ):
783 >                    raise CrabException('User executable '+executable+' not found')
784 >    
785 >                ## then check if it's private or not
786 >                if exeWithPath.find(swReleaseTop) == -1:
787 >                    # the exe is private, so we must ship
788 >                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
789 >                    path = swArea+'/'
790 >                    # distinguish case when script is in user project area or given by full path somewhere else
791 >                    if exeWithPath.find(path) >= 0 :
792 >                        exe = string.replace(exeWithPath, path,'')
793 >                        tar.add(path+exe,os.path.basename(executable))
794 >                    else :
795 >                        tar.add(exeWithPath,os.path.basename(executable))
796 >                    pass
797 >                else:
798 >                    # the exe is from release, we'll find it on WN
799 >                    pass
800 >    
801 >            ## Now get the libraries: only those in local working area
802 >            libDir = 'lib'
803 >            lib = swArea+'/' +libDir
804 >            common.logger.debug(5,"lib "+lib+" to be tarred")
805 >            if os.path.exists(lib):
806 >                tar.add(lib,libDir)
807 >    
808 >            ## Now check if module dir is present
809 >            moduleDir = 'module'
810 >            module = swArea + '/' + moduleDir
811 >            if os.path.isdir(module):
812 >                tar.add(module,moduleDir)
813 >
814 >            ## Now check if any data dir(s) is present
815 >            swAreaLen=len(swArea)
816 >            for root, dirs, files in os.walk(swArea):
817 >                if "data" in dirs:
818 >                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
819 >                    tar.add(root+"/data",root[swAreaLen:]+"/data")
820 >
821 >            ## Add ProdAgent dir to tar
822 >            paDir = 'ProdAgentApi'
823 >            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
824 >            if os.path.isdir(pa):
825 >                tar.add(pa,paDir)
826 >
827 >            ### FEDE FOR DBS PUBLICATION
828 >            ## Add PRODCOMMON dir to tar
829 >            prodcommonDir = 'ProdCommon'
830 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
831 >            if os.path.isdir(prodcommonPath):
832 >                tar.add(prodcommonPath,prodcommonDir)
833 >            #############################    
834 >        
835 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
836 >            tar.close()
837 >        except :
838 >            raise CrabException('Could not create tar-ball')
839 >
840 >        ## check for tarball size
841 >        tarballinfo = os.stat(self.tgzNameWithPath)
842 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
843 >            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
844 >
845 >        ## create tar-ball with ML stuff
846 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
847 >        try:
848 >            tar = tarfile.open(self.MLtgzfile, "w:gz")
849 >            path=os.environ['CRABDIR'] + '/python/'
850 >            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
851 >                tar.add(path+file,file)
852 >            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
853 >            tar.close()
854 >        except :
855 >            raise CrabException('Could not create ML files tar-ball')
856          
857          return
858          
859 +    def additionalInputFileTgz(self):
860 +        """
861 +        Put all additional files into a tar ball and return its name
862 +        """
863 +        import tarfile
864 +        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
865 +        tar = tarfile.open(tarName, "w:gz")
866 +        for file in self.additional_inbox_files:
867 +            tar.add(file,string.split(file,'/')[-1])
868 +        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
869 +        tar.close()
870 +        return tarName
871 +
872      def wsSetupEnvironment(self, nj):
873          """
874          Returns part of a job script which prepares
# Line 491 | Line 880 | class Cmssw(JobType):
880          ## OLI_Daniele at this level  middleware already known
881  
882          txt += 'if [ $middleware == LCG ]; then \n'
883 +        txt += '    echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
884 +        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
885 +        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
886          txt += self.wsSetupCMSLCGEnvironment_()
887          txt += 'elif [ $middleware == OSG ]; then\n'
888 <        txt += '    time=`date -u +"%s"`\n'
889 <        txt += '    WORKING_DIR=$OSG_WN_TMP/cms_$time\n'
498 <        txt += '    echo "Creating working directory: $WORKING_DIR"\n'
499 <        txt += '    /bin/mkdir -p $WORKING_DIR\n'
888 >        txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
889 >        txt += '    echo "Created working directory: $WORKING_DIR"\n'
890          txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
891 <        txt += '        echo "OSG WORKING DIR ==> $WORKING_DIR could not be created on on WN `hostname`"\n'
892 <    
893 <        txt += '        echo "JOB_EXIT_STATUS = 1"\n'
891 >        txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
892 >        txt += '    echo "JOB_EXIT_STATUS = 10016"\n'
893 >        txt += '    echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
894 >        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
895 >        txt += '        rm -f $RUNTIME_AREA/$repo \n'
896 >        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
897 >        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
898          txt += '        exit 1\n'
899          txt += '    fi\n'
900          txt += '\n'
901          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
902          txt += '    cd $WORKING_DIR\n'
903          txt += self.wsSetupCMSOSGEnvironment_()
904 +        txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
905 +        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
906          txt += 'fi\n'
907  
908          # Prepare JobType-specific part
# Line 516 | Line 912 | class Cmssw(JobType):
912          txt += scram+' project CMSSW '+self.version+'\n'
913          txt += 'status=$?\n'
914          txt += 'if [ $status != 0 ] ; then\n'
915 <        txt += '   echo "SET_EXE_ENV 1 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
915 >        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
916          txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
917 <        txt += '   echo "SanityCheckCode = 10034" | tee -a $RUNTIME_AREA/$repo\n'
917 >        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
918          txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
919 +        txt += '   rm -f $RUNTIME_AREA/$repo \n'
920 +        txt += '   echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
921 +        txt += '   echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
922          ## OLI_Daniele
923          txt += '    if [ $middleware == OSG ]; then \n'
924          txt += '        echo "Remove working directory: $WORKING_DIR"\n'
925          txt += '        cd $RUNTIME_AREA\n'
926          txt += '        /bin/rm -rf $WORKING_DIR\n'
927          txt += '        if [ -d $WORKING_DIR ] ;then\n'
928 <        txt += '            echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n'
928 >        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
929 >        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
930 >        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
931 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
932 >        txt += '            rm -f $RUNTIME_AREA/$repo \n'
933 >        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
934 >        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
935          txt += '        fi\n'
936          txt += '    fi \n'
937          txt += '   exit 1 \n'
938          txt += 'fi \n'
939          txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
940          txt += 'cd '+self.version+'\n'
941 +        ########## FEDE FOR DBS2 ######################
942 +        txt += 'SOFTWARE_DIR=`pwd`\n'
943 +        txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
944 +        ###############################################
945          ### needed grep for bug in scramv1 ###
946 +        txt += scram+' runtime -sh\n'
947          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
948 +        txt += 'echo $PATH\n'
949  
950          # Handle the arguments:
951          txt += "\n"
952 <        txt += "## ARGUMNETS: $1 Job Number\n"
542 <        # txt += "## ARGUMNETS: $2 First Event for this job\n"
543 <        # txt += "## ARGUMNETS: $3 Max Event for this job\n"
952 >        txt += "## number of arguments (first argument always jobnumber)\n"
953          txt += "\n"
954 <        txt += "narg=$#\n"
955 <        txt += "if [ $narg -lt 2 ]\n"
954 > #        txt += "narg=$#\n"
955 >        txt += "if [ $nargs -lt 2 ]\n"
956          txt += "then\n"
957 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$narg+ \n"
957 >        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
958          txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
959 <        txt += '    echo "SanityCheckCode = 50113" | tee -a $RUNTIME_AREA/$repo\n'
959 >        txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
960          txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
961 +        txt += '    rm -f $RUNTIME_AREA/$repo \n'
962 +        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
963 +        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
964          ## OLI_Daniele
965          txt += '    if [ $middleware == OSG ]; then \n'
966          txt += '        echo "Remove working directory: $WORKING_DIR"\n'
967          txt += '        cd $RUNTIME_AREA\n'
968          txt += '        /bin/rm -rf $WORKING_DIR\n'
969          txt += '        if [ -d $WORKING_DIR ] ;then\n'
970 <        txt += '            echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n'
970 >        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
971 >        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
972 >        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
973 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
974 >        txt += '            rm -f $RUNTIME_AREA/$repo \n'
975 >        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
976 >        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
977          txt += '        fi\n'
978          txt += '    fi \n'
979          txt += "    exit 1\n"
980          txt += "fi\n"
981          txt += "\n"
564        txt += "NJob=$1\n"
565        txt += "InputFiles=$2\n"
566        txt += "echo \"<$InputFiles>\"\n"
567        # txt += "Args = ` cat $2 |  sed -e \'s/\\\\//g\' -e \'s/\"/\\x27/g\' `"
568
569        ### OLI_DANIELE
570        txt += 'if [ $middleware == LCG ]; then \n'
571        txt += '    echo "MonitorJobID=`echo ${NJob}_$EDG_WL_JOBID`" | tee -a $RUNTIME_AREA/$repo\n'
572        txt += '    echo "SyncGridJobId=`echo $EDG_WL_JOBID`" | tee -a $RUNTIME_AREA/$repo\n'
573        txt += '    echo "SyncCE=`edg-brokerinfo getCE`" | tee -a $RUNTIME_AREA/$repo\n'
574        txt += 'elif [ $middleware == OSG ]; then\n'
575
576        # OLI: added monitoring for dashbord, use hash of crab.cfg
577        if common.scheduler.boss_scheduler_name == 'condor_g':
578            # create hash of cfg file
579            hash = makeCksum(common.work_space.cfgFileName())
580            txt += '    echo "MonitorJobID=`echo ${NJob}_'+hash+'_$GLOBUS_GRAM_JOB_CONTACT`" | tee -a $RUNTIME_AREA/$repo\n'
581            txt += '    echo "SyncGridJobId=`echo $GLOBUS_GRAM_JOB_CONTACT`" | tee -a $RUNTIME_AREA/$repo\n'
582            txt += '    echo "SyncCE=`echo $hostname`" | tee -a $RUNTIME_AREA/$repo\n'
583        else :
584            txt += '    echo "MonitorJobID=`echo ${NJob}_$EDG_WL_JOBID`" | tee -a $RUNTIME_AREA/$repo\n'
585            txt += '    echo "SyncGridJobId=`echo $EDG_WL_JOBID`" | tee -a $RUNTIME_AREA/$repo\n'
586            txt += '    echo "SyncCE=`$EDG_WL_LOG_DESTINATION`" | tee -a $RUNTIME_AREA/$repo\n'
587
588        txt += 'fi\n'
589        txt += 'dumpStatus $RUNTIME_AREA/$repo\n'
982  
983          # Prepare job-specific part
984          job = common.job_list[nj]
985 <        pset = os.path.basename(job.configFilename())
986 <        txt += '\n'
987 <        #txt += 'echo sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' \n'
988 <        txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
989 <        #txt += 'sed "s#{\'INPUT\'}#${InputFiles}#" $RUNTIME_AREA/'+pset+' > pset1.cfg\n'
985 >        ### FEDE FOR DBS OUTPUT PUBLICATION
986 >        if (self.datasetPath):
987 >            txt += '\n'
988 >            txt += 'DatasetPath='+self.datasetPath+'\n'
989 >
990 >            datasetpath_split = self.datasetPath.split("/")
991 >            
992 >            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
993 >            txt += 'DataTier='+datasetpath_split[2]+'\n'
994 >            #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
995 >            txt += 'ApplicationFamily=cmsRun\n'
996 >
997 >        else:
998 >            txt += 'DatasetPath=MCDataTier\n'
999 >            txt += 'PrimaryDataset=null\n'
1000 >            txt += 'DataTier=null\n'
1001 >            #txt += 'ProcessedDataset=null\n'
1002 >            txt += 'ApplicationFamily=MCDataTier\n'
1003 >        if self.pset != None: #CarlosDaniele
1004 >            pset = os.path.basename(job.configFilename())
1005 >            txt += '\n'
1006 >            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
1007 >            if (self.datasetPath): # standard job
1008 >                #txt += 'InputFiles=$2\n'
1009 >                txt += 'InputFiles=${args[1]}\n'
1010 >                txt += 'MaxEvents=${args[2]}\n'
1011 >                txt += 'SkipEvents=${args[3]}\n'
1012 >                txt += 'echo "Inputfiles:<$InputFiles>"\n'
1013 >                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1014 >                txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1015 >                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1016 >                txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1017 >                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1018 >            else:  # pythia like job
1019 >                seedIndex=1
1020 >                if (self.firstRun):
1021 >                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
1022 >                    txt += 'echo "FirstRun: <$FirstRun>"\n'
1023 >                    txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1024 >                    seedIndex=seedIndex+1
1025 >
1026 >                if (self.sourceSeed):
1027 >                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
1028 >                    txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1029 >                    seedIndex=seedIndex+1
1030 >                    ## the following seeds are not always present
1031 >                    if (self.sourceSeedVtx):
1032 >                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1033 >                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1034 >                        txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1035 >                        seedIndex += 1
1036 >                    if (self.sourceSeedG4):
1037 >                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1038 >                        txt += 'echo "G4Seed: <$G4Seed>"\n'
1039 >                        txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1040 >                        seedIndex += 1
1041 >                    if (self.sourceSeedMix):
1042 >                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1043 >                        txt += 'echo "MixSeed: <$mixSeed>"\n'
1044 >                        txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1045 >                        seedIndex += 1
1046 >                    pass
1047 >                pass
1048 >            txt += 'mv -f '+pset+' pset.cfg\n'
1049  
1050          if len(self.additional_inbox_files) > 0:
1051 <            for file in self.additional_inbox_files:
1052 <                txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n'
1053 <                txt += '   cp $RUNTIME_AREA/'+file+' .\n'
603 <                txt += '   chmod +x '+file+'\n'
604 <                txt += 'fi\n'
1051 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1052 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1053 >            txt += 'fi\n'
1054              pass
1055  
1056 <        txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1057 <
1058 <        txt += '\n'
1059 <        txt += 'echo "***** cat pset.cfg *********"\n'
1060 <        txt += 'cat pset.cfg\n'
1061 <        txt += 'echo "****** end pset.cfg ********"\n'
1062 <        txt += '\n'
1063 <        # txt += 'echo "***** cat pset1.cfg *********"\n'
1064 <        # txt += 'cat pset1.cfg\n'
1065 <        # txt += 'echo "****** end pset1.cfg ********"\n'
1056 >        if self.pset != None: #CarlosDaniele
1057 >            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1058 >        
1059 >            txt += '\n'
1060 >            txt += 'echo "***** cat pset.cfg *********"\n'
1061 >            txt += 'cat pset.cfg\n'
1062 >            txt += 'echo "****** end pset.cfg ********"\n'
1063 >            txt += '\n'
1064 >            ### FEDE FOR DBS OUTPUT PUBLICATION
1065 >            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1066 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
1067 >            ##############
1068 >            txt += '\n'
1069 >            # txt += 'echo "***** cat pset1.cfg *********"\n'
1070 >            # txt += 'cat pset1.cfg\n'
1071 >            # txt += 'echo "****** end pset1.cfg ********"\n'
1072          return txt
1073  
1074 <    def wsBuildExe(self, nj):
1074 >    def wsBuildExe(self, nj=0):
1075          """
1076          Put in the script the commands to build an executable
1077          or a library.
# Line 631 | Line 1086 | class Cmssw(JobType):
1086              txt += 'if [ $untar_status -ne 0 ]; then \n'
1087              txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1088              txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
1089 <            txt += '   echo "SanityCheckCode = $untar_status" | tee -a $repo\n'
1089 >            txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1090              txt += '   if [ $middleware == OSG ]; then \n'
1091              txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1092              txt += '       cd $RUNTIME_AREA\n'
1093              txt += '       /bin/rm -rf $WORKING_DIR\n'
1094              txt += '       if [ -d $WORKING_DIR ] ;then\n'
1095 <            txt += '           echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n'
1095 >            txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1096 >            txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1097 >            txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1098 >            txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1099 >            txt += '           rm -f $RUNTIME_AREA/$repo \n'
1100 >            txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1101 >            txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1102              txt += '       fi\n'
1103              txt += '   fi \n'
1104              txt += '   \n'
1105 <            txt += '   exit $untar_status \n'
1105 >            txt += '   exit 1 \n'
1106              txt += 'else \n'
1107              txt += '   echo "Successful untar" \n'
1108              txt += 'fi \n'
1109 +            txt += '\n'
1110 +            txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1111 +            txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1112 +            #### FEDE FOR DBS OUTPUT PUBLICATION
1113 +            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1114 +            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1115 +            #txt += '   export PYTHONPATH=ProdAgentApi\n'
1116 +            txt += 'else\n'
1117 +            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1118 +            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1119 +            #txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1120 +            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1121 +            ###################  
1122 +            txt += 'fi\n'
1123 +            txt += '\n'
1124 +
1125              pass
1126          
1127          return txt
# Line 656 | Line 1133 | class Cmssw(JobType):
1133          """
1134          
1135      def executableName(self):
1136 <        return self.executable
1136 >        if self.scriptExe: #CarlosDaniele
1137 >            return "sh "
1138 >        else:
1139 >            return self.executable
1140  
1141      def executableArgs(self):
1142 <        return " -p pset.cfg"
1142 >        if self.scriptExe:#CarlosDaniele
1143 >            return   self.scriptExe + " $NJob"
1144 >        else:
1145 >            # if >= CMSSW_1_5_X, add -e
1146 >            version_array = self.scram.getSWVersion().split('_')
1147 >            major = 0
1148 >            minor = 0
1149 >            try:
1150 >                major = int(version_array[1])
1151 >                minor = int(version_array[2])
1152 >            except:
1153 >                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"  
1154 >                raise CrabException(msg)
1155 >            if major >= 1 and minor >= 5 :
1156 >                return " -e -p pset.cfg"
1157 >            else:
1158 >                return " -p pset.cfg"
1159  
1160      def inputSandbox(self, nj):
1161          """
1162          Returns a list of filenames to be put in JDL input sandbox.
1163          """
1164          inp_box = []
1165 <        # dict added to delete duplicate from input sandbox file list
1166 <        seen = {}
1165 >        # # dict added to delete duplicate from input sandbox file list
1166 >        # seen = {}
1167          ## code
1168          if os.path.isfile(self.tgzNameWithPath):
1169              inp_box.append(self.tgzNameWithPath)
1170 +        if os.path.isfile(self.MLtgzfile):
1171 +            inp_box.append(self.MLtgzfile)
1172          ## config
1173 <        inp_box.append(common.job_list[nj].configFilename())
1173 >        if not self.pset is None:
1174 >            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1175          ## additional input files
1176 <        #for file in self.additional_inbox_files:
1177 <        #    inp_box.append(common.work_space.cwdDir()+file)
1176 >        tgz = self.additionalInputFileTgz()
1177 >        inp_box.append(tgz)
1178          return inp_box
1179  
1180      def outputSandbox(self, nj):
# Line 684 | Line 1183 | class Cmssw(JobType):
1183          """
1184          out_box = []
1185  
687        stdout=common.job_list[nj].stdout()
688        stderr=common.job_list[nj].stderr()
689
1186          ## User Declared output files
1187 <        for out in self.output_file:
1187 >        for out in (self.output_file+self.output_file_sandbox):
1188              n_out = nj + 1
1189              out_box.append(self.numberFile_(out,str(n_out)))
1190          return out_box
695        return []
1191  
1192      def prepareSteeringCards(self):
1193          """
# Line 706 | Line 1201 | class Cmssw(JobType):
1201          """
1202  
1203          txt = '\n'
1204 <        file_list = ''
1205 <        check = len(self.output_file)
1206 <        i = 0
1207 <        for fileWithSuffix in self.output_file:
1208 <            i= i + 1
1204 >        txt += '# directory content\n'
1205 >        txt += 'ls \n'
1206 >
1207 >        txt += 'output_exit_status=0\n'
1208 >        
1209 >        for fileWithSuffix in (self.output_file_sandbox):
1210              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
715            file_list=file_list+output_file_num+''
1211              txt += '\n'
1212 <            txt += 'ls \n'
1212 >            txt += '# check output file\n'
1213 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1214 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1215 >            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1216 >            txt += 'else\n'
1217 >            txt += '    exit_status=60302\n'
1218 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1219 >            if common.scheduler.boss_scheduler_name == 'condor_g':
1220 >                txt += '    if [ $middleware == OSG ]; then \n'
1221 >                txt += '        echo "prepare dummy output file"\n'
1222 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1223 >                txt += '    fi \n'
1224 >            txt += 'fi\n'
1225 >        
1226 >        for fileWithSuffix in (self.output_file):
1227 >            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1228              txt += '\n'
1229 <            txt += 'ls '+fileWithSuffix+'\n'
1230 <            txt += 'exe_result=$?\n'
1231 <            txt += 'if [ $exe_result -ne 0 ] ; then\n'
1232 <            txt += '   echo "ERROR: No output file to manage"\n'
723 <            ### OLI_DANIELE
724 <            txt += '    if [ $middleware == OSG ]; then \n'
725 <            txt += '        echo "prepare dummy output file"\n'
726 <            txt += '        cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
727 <            txt += '    fi \n'
1229 >            txt += '# check output file\n'
1230 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1231 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1232 >            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1233              txt += 'else\n'
1234 <            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1234 >            txt += '    exit_status=60302\n'
1235 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1236 >            txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1237 >            txt += '    output_exit_status=$exit_status\n'
1238 >            if common.scheduler.boss_scheduler_name == 'condor_g':
1239 >                txt += '    if [ $middleware == OSG ]; then \n'
1240 >                txt += '        echo "prepare dummy output file"\n'
1241 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1242 >                txt += '    fi \n'
1243              txt += 'fi\n'
1244 <            if i == check:
1245 <                txt += 'cd $RUNTIME_AREA\n'
1246 <                pass      
1247 <            pass
1248 <      
1249 <        file_list=file_list[:-1]
737 <        txt += 'file_list="'+file_list+'"\n'
738 <        ### OLI_DANIELE
739 <        txt += 'if [ $middleware == OSG ]; then\n'  
740 <        txt += '    cd $RUNTIME_AREA\n'
741 <        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
742 <        txt += '    /bin/rm -rf $WORKING_DIR\n'
743 <        txt += '    if [ -d $WORKING_DIR ] ;then\n'
744 <        txt += '        echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n'
745 <        txt += '    fi\n'
746 <        txt += 'fi\n'
747 <        txt += '\n'
1244 >        file_list = []
1245 >        for fileWithSuffix in (self.output_file):
1246 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1247 >            
1248 >        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1249 >        txt += 'cd $RUNTIME_AREA\n'
1250          return txt
1251  
1252      def numberFile_(self, file, txt):
# Line 755 | Line 1257 | class Cmssw(JobType):
1257          # take away last extension
1258          name = p[0]
1259          for x in p[1:-1]:
1260 <           name=name+"."+x
1260 >            name=name+"."+x
1261          # add "_txt"
1262          if len(p)>1:
1263 <          ext = p[len(p)-1]
1264 <          #result = name + '_' + str(txt) + "." + ext
763 <          result = name + '_' + txt + "." + ext
1263 >            ext = p[len(p)-1]
1264 >            result = name + '_' + txt + "." + ext
1265          else:
1266 <          #result = name + '_' + str(txt)
766 <          result = name + '_' + txt
1266 >            result = name + '_' + txt
1267          
1268          return result
1269  
1270 <    def getRequirements(self):
1270 >    def getRequirements(self, nj=[]):
1271          """
1272          return job requirements to add to jdl files
1273          """
1274          req = ''
1275 <        if common.analisys_common_info['sites']:
1276 <            if common.analisys_common_info['sw_version']:
1277 <                req='Member("VO-cms-' + \
1278 <                     common.analisys_common_info['sw_version'] + \
1279 <                     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1280 <            if len(common.analisys_common_info['sites'])>0:
1281 <                req = req + ' && ('
1282 <                for i in range(len(common.analisys_common_info['sites'])):
1283 <                    req = req + 'other.GlueCEInfoHostName == "' \
1284 <                         + common.analisys_common_info['sites'][i] + '"'
1285 <                    if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
1286 <                        req = req + ' || '
1287 <            req = req + ')'
788 <        #print "req = ", req
1275 >        if self.version:
1276 >            req='Member("VO-cms-' + \
1277 >                 self.version + \
1278 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1279 >        ## SL add requirement for OS version only if SL4
1280 >        #reSL4 = re.compile( r'slc4' )
1281 >        if self.executable_arch: # and reSL4.search(self.executable_arch):
1282 >            req+=' && Member("VO-cms-' + \
1283 >                 self.executable_arch + \
1284 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1285 >
1286 >        req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1287 >
1288          return req
1289  
1290      def configFilename(self):
# Line 802 | Line 1301 | class Cmssw(JobType):
1301          txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1302          txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1303          txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1304 +        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1305          txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1306 <        txt += '   elif [ -f $OSG_APP/cmssoft/cmsset_default.sh ] ;then\n'
1307 <        txt += '      # Use $OSG_APP/cmssoft/cmsset_default.sh to setup cms software\n'
1308 <        txt += '       source $OSG_APP/cmssoft/cmsset_default.sh '+self.version+'\n'
1306 >        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1307 >        txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1308 >        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1309 >        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1310          txt += '   else\n'
1311 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
1311 >        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1312          txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1313          txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1314          txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1315 <        txt += '       exit\n'
1315 >        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1316 >        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1317 >        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1318 >        txt += '       exit 1\n'
1319          txt += '\n'
1320          txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1321          txt += '       cd $RUNTIME_AREA\n'
1322          txt += '       /bin/rm -rf $WORKING_DIR\n'
1323          txt += '       if [ -d $WORKING_DIR ] ;then\n'
1324 <        txt += '           echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n'
1324 >        txt += '           echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1325 >        txt += '           echo "JOB_EXIT_STATUS = 10017"\n'
1326 >        txt += '           echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1327 >        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1328 >        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1329 >        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1330 >        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1331          txt += '       fi\n'
1332          txt += '\n'
1333 <        txt += '       exit\n'
1333 >        txt += '       exit 1\n'
1334          txt += '   fi\n'
1335          txt += '\n'
1336          txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
# Line 836 | Line 1346 | class Cmssw(JobType):
1346          """
1347          txt  = '   \n'
1348          txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
839        txt += '      echo "JOB_EXIT_STATUS = 0"\n'
1349          txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
1350          txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1351          txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1352          txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1353          txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1354 <        txt += '       exit\n'
1354 >        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1355 >        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1356 >        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1357 >        txt += '       exit 1\n'
1358          txt += '   else\n'
1359          txt += '       echo "Sourcing environment... "\n'
1360          txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
# Line 850 | Line 1362 | class Cmssw(JobType):
1362          txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1363          txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1364          txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1365 <        txt += '           exit\n'
1365 >        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1366 >        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1367 >        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1368 >        txt += '           exit 1\n'
1369          txt += '       fi\n'
1370          txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1371          txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
# Line 860 | Line 1375 | class Cmssw(JobType):
1375          txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1376          txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1377          txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1378 <        txt += '           exit\n'
1378 >        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1379 >        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1380 >        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1381 >        txt += '           exit 1\n'
1382          txt += '       fi\n'
1383          txt += '   fi\n'
1384          txt += '   \n'
867        txt += '   string=`cat /etc/redhat-release`\n'
868        txt += '   echo $string\n'
869        txt += '   if [[ $string = *alhalla* ]]; then\n'
870        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
871        txt += '   elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
872        txt += '       export SCRAM_ARCH=slc3_ia32_gcc323\n'
873        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
874        txt += '   else\n'
875        txt += '       echo "SET_CMS_ENV 1 ==> ERROR OS unknown, LCG environment not initialized"\n'
876        txt += '       echo "JOB_EXIT_STATUS = 10033"\n'
877        txt += '       echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
878        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
879        txt += '       exit\n'
880        txt += '   fi\n'
1385          txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1386          txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1387          return txt
1388 +
1389 +    ### FEDE FOR DBS OUTPUT PUBLICATION
1390 +    def modifyReport(self, nj):
1391 +        """
1392 +        insert the part of the script that modifies the FrameworkJob Report
1393 +        """
1394 +
1395 +        txt = ''
1396 +        try:
1397 +            publish_data = int(self.cfg_params['USER.publish_data'])          
1398 +        except KeyError:
1399 +            publish_data = 0
1400 +        if (publish_data == 1):  
1401 +            txt += 'echo "Modify Job Report" \n'
1402 +            #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1403 +            ################ FEDE FOR DBS2 #############################################
1404 +            txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1405 +            #############################################################################
1406 +            #try:
1407 +            #    publish_data = int(self.cfg_params['USER.publish_data'])          
1408 +            #except KeyError:
1409 +            #    publish_data = 0
1410 +
1411 +            txt += 'if [ -z "$SE" ]; then\n'
1412 +            txt += '    SE="" \n'
1413 +            txt += 'fi \n'
1414 +            txt += 'if [ -z "$SE_PATH" ]; then\n'
1415 +            txt += '    SE_PATH="" \n'
1416 +            txt += 'fi \n'
1417 +            txt += 'echo "SE = $SE"\n'
1418 +            txt += 'echo "SE_PATH = $SE_PATH"\n'
1419 +
1420 +        #if (publish_data == 1):  
1421 +            #processedDataset = self.cfg_params['USER.processed_datasetname']
1422 +            processedDataset = self.cfg_params['USER.publish_data_name']
1423 +            txt += 'ProcessedDataset='+processedDataset+'\n'
1424 +            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1425 +            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1426 +            #### FEDE: added slash in LFN ##############
1427 +            txt += '    FOR_LFN=/copy_problems/ \n'
1428 +            txt += 'else \n'
1429 +            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1430 +            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1431 +            txt += '    FOR_LFN=/store$tmp \n'
1432 +            txt += 'fi \n'
1433 +            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1434 +            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1435 +            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1436 +            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1437 +            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1438 +            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1439 +            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1440 +      
1441 +            txt += 'modifyReport_result=$?\n'
1442 +            txt += 'echo modifyReport_result = $modifyReport_result\n'
1443 +            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1444 +            txt += '    exit_status=1\n'
1445 +            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1446 +            txt += 'else\n'
1447 +            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1448 +            txt += 'fi\n'
1449 +        else:
1450 +            txt += 'echo "no data publication required"\n'
1451 +            #txt += 'ProcessedDataset=no_data_to_publish \n'
1452 +            #### FEDE: added slash in LFN ##############
1453 +            #txt += 'FOR_LFN=/local/ \n'
1454 +            #txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1455 +            #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1456 +        return txt
1457 +
1458 +    def cleanEnv(self):
1459 +        ### OLI_DANIELE
1460 +        txt = ''
1461 +        txt += 'if [ $middleware == OSG ]; then\n'  
1462 +        txt += '    cd $RUNTIME_AREA\n'
1463 +        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1464 +        txt += '    /bin/rm -rf $WORKING_DIR\n'
1465 +        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1466 +        txt += '              echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1467 +        txt += '              echo "JOB_EXIT_STATUS = 60999"\n'
1468 +        txt += '              echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1469 +        txt += '              dumpStatus $RUNTIME_AREA/$repo\n'
1470 +        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1471 +        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1472 +        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1473 +        txt += '    fi\n'
1474 +        txt += 'fi\n'
1475 +        txt += '\n'
1476 +        return txt
1477 +
1478 +    def setParam_(self, param, value):
1479 +        self._params[param] = value
1480 +
1481 +    def getParams(self):
1482 +        return self._params
1483 +
1484 +    def setTaskid_(self):
1485 +        self._taskId = self.cfg_params['taskId']
1486 +        
1487 +    def getTaskid(self):
1488 +        return self._taskId
1489 +
1490 +    def uniquelist(self, old):
1491 +        """
1492 +        remove duplicates from a list
1493 +        """
1494 +        nd={}
1495 +        for e in old:
1496 +            nd[e]=0
1497 +        return nd.keys()
1498 +
1499 +
1500 +    def checkOut(self, limit):
1501 +        """
1502 +        check the dimension of the output files
1503 +        """
1504 +        txt = 'echo "*****************************************"\n'
1505 +        txt += 'echo "** Starting output sandbox limit check **"\n'
1506 +        txt += 'echo "*****************************************"\n'
1507 +        allOutFiles = ""
1508 +        listOutFiles = []
1509 +        for fileOut in (self.output_file+self.output_file_sandbox):
1510 +             if fileOut.find('crab_fjr') == -1:
1511 +                 allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1512 +                 listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1513 +        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1514 +        txt += 'ls -gGhrta;\n'
1515 +        txt += 'sum=0;\n'
1516 +        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1517 +        txt += '    if [ -e $file ]; then\n'
1518 +        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1519 +        txt += '        sum=`expr $sum + $tt`\n'
1520 +        txt += '    else\n'
1521 +        txt += '        echo "WARNING: output file $file not found!"\n'
1522 +        txt += '    fi\n'
1523 +        txt += 'done\n'
1524 +        txt += 'echo "Total Output dimension: $sum";\n'
1525 +        txt += 'limit='+str(limit)+';\n'
1526 +        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1527 +        txt += 'if [ $limit -lt $sum ]; then\n'
1528 +        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1529 +        txt += '    echo "         checking the output file sizes..."\n'
1530 +        """
1531 +        txt += '    dim=0;\n'
1532 +        txt += '    exclude=0;\n'
1533 +        txt += '    for files in '+str(allOutFiles)+' ; do\n'
1534 +        txt += '        sumTemp=0;\n'
1535 +        txt += '        for file2 in '+str(allOutFiles)+' ; do\n'
1536 +        txt += '            if [ $file != $file2 ]; then\n'
1537 +        txt += '                tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1538 +        txt += '                sumTemp=`expr $sumTemp + $tt`;\n'
1539 +        txt += '            fi\n'
1540 +        txt += '        done\n'
1541 +        txt += '        if [ $sumTemp -lt $limit ]; then\n'
1542 +        txt += '            if [ $dim -lt $sumTemp ]; then\n'
1543 +        txt += '                dim=$sumTemp;\n'
1544 +        txt += '                exclude=$file;\n'
1545 +        txt += '            fi\n'
1546 +        txt += '        fi\n'
1547 +        txt += '    done\n'
1548 +        txt += '    echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1549 +        """
1550 +        txt += '    tot=0;\n'
1551 +        txt += '    for file2 in '+str(allOutFiles)+' ; do\n'
1552 +        txt += '        tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1553 +        txt += '        tot=`expr $tot + $tt`;\n'
1554 +        txt += '        if [ $limit -lt $tot ]; then\n'
1555 +        txt += '            tot=`expr $tot - $tt`;\n'
1556 +        txt += '            fileLast=$file;\n'
1557 +        txt += '            break;\n'
1558 +        txt += '        fi\n'
1559 +        txt += '    done\n'
1560 +        txt += '    echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1561 +        txt += '    flag=0;\n'    
1562 +        txt += '    for filess in '+str(allOutFiles)+' ; do\n'
1563 +        txt += '        if [ $fileLast = $filess ]; then\n'
1564 +        txt += '            flag=1;\n'
1565 +        txt += '        fi\n'
1566 +        txt += '        if [ $flag -eq 1 ]; then\n'
1567 +        txt += '            rm -f $filess;\n'
1568 +        txt += '        fi\n'
1569 +        txt += '    done\n'
1570 +        txt += '    ls -agGhrt;\n'
1571 +        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1572 +        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1573 +        txt += '    exit_status=70000;\n'
1574 +        txt += 'else'
1575 +        txt += '    echo "Total Output dimension $sum is fine.";\n'
1576 +        txt += 'fi\n'
1577 +        txt += 'echo "*****************************************"\n'
1578 +        txt += 'echo "*** Ending output sandbox limit check ***"\n'
1579 +        txt += 'echo "*****************************************"\n'
1580 +        return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines