ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.64 by slacapra, Thu Jan 18 18:29:51 2007 UTC vs.
Revision 1.136 by fanzago, Wed Nov 14 17:53:21 2007 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 + from BlackWhiteListParser import BlackWhiteListParser
6   import common
6 import PsetManipulator  
7
8 import DataDiscovery
9 import DataLocation
7   import Scram
8  
9 < import os, string, re, shutil
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12      def __init__(self, cfg_params, ncjobs):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15  
19        # Marco.
16          self._params = {}
17          self.cfg_params = cfg_params
18  
19 +        # init BlackWhiteListParser
20 +        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21 +
22 +        try:
23 +            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24 +        except KeyError:
25 +            self.MaxTarBallSize = 9.5
26 +
27          # number of jobs requested to be created, limit obj splitting
28          self.ncjobs = ncjobs
29  
30          log = common.logger
31 <        
31 >
32          self.scram = Scram.Scram(cfg_params)
33          self.additional_inbox_files = []
34          self.scriptExe = ''
35          self.executable = ''
36 +        self.executable_arch = self.scram.getArch()
37          self.tgz_name = 'default.tgz'
38 +        self.additional_tgz_name = 'additional.tgz'
39          self.scriptName = 'CMSSW.sh'
40 <        self.pset = ''      #scrip use case Da  
40 >        self.pset = ''      #scrip use case Da
41          self.datasetPath = '' #scrip use case Da
42  
43          # set FJR file name
44          self.fjrFileName = 'crab_fjr.xml'
45  
46          self.version = self.scram.getSWVersion()
47 +
48 +        #
49 +        # Try to block creation in case of arch/version mismatch
50 +        #
51 +
52 +        a = string.split(self.version, "_")
53 +
54 +        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55 +            msg = "Warning: You are using %s version of CMSSW  with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
56 +            common.logger.message(msg)
57 +        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58 +            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59 +            raise CrabException(msg)
60 +
61          common.taskDB.setDict('codeVersion',self.version)
62          self.setParam_('application', self.version)
63  
64          ### collect Data cards
65 +
66 +        ## get DBS mode
67 +        try:
68 +            self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 +        except KeyError:
70 +            self.use_dbs_1 = 0
71 +
72          try:
73              tmp =  cfg_params['CMSSW.datasetpath']
74              log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
# Line 52 | Line 79 | class Cmssw(JobType):
79                  self.datasetPath = tmp
80                  self.selectNoInput = 0
81          except KeyError:
82 <            msg = "Error: datasetpath not defined "  
82 >            msg = "Error: datasetpath not defined "
83              raise CrabException(msg)
84  
85          # ML monitoring
# Line 61 | Line 88 | class Cmssw(JobType):
88              self.setParam_('dataset', 'None')
89              self.setParam_('owner', 'None')
90          else:
91 <            datasetpath_split = self.datasetPath.split("/")
92 <            self.setParam_('dataset', datasetpath_split[1])
93 <            self.setParam_('owner', datasetpath_split[-1])
91 >            try:
92 >                datasetpath_split = self.datasetPath.split("/")
93 >                # standard style
94 >                self.setParam_('datasetFull', self.datasetPath)
95 >                if self.use_dbs_1 == 1 :
96 >                    self.setParam_('dataset', datasetpath_split[1])
97 >                    self.setParam_('owner', datasetpath_split[-1])
98 >                else:
99 >                    self.setParam_('dataset', datasetpath_split[1])
100 >                    self.setParam_('owner', datasetpath_split[2])
101 >            except:
102 >                self.setParam_('dataset', self.datasetPath)
103 >                self.setParam_('owner', self.datasetPath)
104  
105          self.setTaskid_()
106          self.setParam_('taskId', self.cfg_params['taskId'])
# Line 87 | Line 124 | class Cmssw(JobType):
124          try:
125              self.pset = cfg_params['CMSSW.pset']
126              log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 <            if self.pset.lower() != 'none' :
127 >            if self.pset.lower() != 'none' :
128                  if (not os.path.exists(self.pset)):
129                      raise CrabException("User defined PSet file "+self.pset+" does not exist")
130              else:
# Line 114 | Line 151 | class Cmssw(JobType):
151                      self.output_file.append(tmp)
152                      pass
153              else:
154 <                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
154 >                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155                  pass
156              pass
157          except KeyError:
158 <            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
158 >            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159              pass
160  
161          # script_exe file as additional file in inputSandbox
# Line 131 | Line 168 | class Cmssw(JobType):
168                 self.additional_inbox_files.append(string.strip(self.scriptExe))
169          except KeyError:
170              self.scriptExe = ''
171 +
172          #CarlosDaniele
173          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174 <           msg ="WARNING. script_exe  not defined"
174 >           msg ="Error. script_exe  not defined"
175             raise CrabException(msg)
176  
177          ## additional input files
178          try:
179              tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
180 <            common.logger.debug(5,"Additional input files: "+str(tmpAddFiles))
181 <            for tmpFile in tmpAddFiles:
182 <                tmpFile = string.strip(tmpFile)
183 <                if not os.path.exists(tmpFile):
184 <                    raise CrabException("Additional input file not found: "+tmpFile)
180 >            for tmp in tmpAddFiles:
181 >                tmp = string.strip(tmp)
182 >                dirname = ''
183 >                if not tmp[0]=="/": dirname = "."
184 >                files = []
185 >                if string.find(tmp,"*")>-1:
186 >                    files = glob.glob(os.path.join(dirname, tmp))
187 >                    if len(files)==0:
188 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
189 >                else:
190 >                    files.append(tmp)
191 >                for file in files:
192 >                    if not os.path.exists(file):
193 >                        raise CrabException("Additional input file not found: "+file)
194                      pass
195 <                storedFile = common.work_space.shareDir()+ tmpFile
196 <                shutil.copyfile(tmpFile, storedFile)
197 <                self.additional_inbox_files.append(string.strip(storedFile))
195 >                    # fname = string.split(file, '/')[-1]
196 >                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
197 >                    # shutil.copyfile(file, storedFile)
198 >                    self.additional_inbox_files.append(string.strip(file))
199                  pass
152            common.logger.debug(5,"Inbox files so far : "+str(self.additional_inbox_files))
200              pass
201 +            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
202          except KeyError:
203              pass
204  
# Line 168 | Line 216 | class Cmssw(JobType):
216          except KeyError:
217              self.eventsPerJob = -1
218              self.selectEventsPerJob = 0
219 <    
219 >
220          ## number of jobs
221          try:
222              self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
# Line 184 | Line 232 | class Cmssw(JobType):
232              self.total_number_of_events = 0
233              self.selectTotalNumberEvents = 0
234  
235 <        if self.pset != None: #CarlosDaniele
235 >        if self.pset != None: #CarlosDaniele
236               if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
237                   msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
238                   raise CrabException(msg)
# Line 205 | Line 253 | class Cmssw(JobType):
253          except KeyError:
254              self.sourceSeedVtx = None
255              common.logger.debug(5,"No vertex seed given")
256 +
257 +        try:
258 +            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259 +        except KeyError:
260 +            self.sourceSeedG4 = None
261 +            common.logger.debug(5,"No g4 sim hits seed given")
262 +
263 +        try:
264 +            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265 +        except KeyError:
266 +            self.sourceSeedMix = None
267 +            common.logger.debug(5,"No mix seed given")
268 +
269          try:
270              self.firstRun = int(cfg_params['CMSSW.first_run'])
271          except KeyError:
272              self.firstRun = None
273              common.logger.debug(5,"No first run given")
274          if self.pset != None: #CarlosDaniele
275 <            self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
275 >            import PsetManipulator as pp
276 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
277  
278          #DBSDLS-start
279 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
279 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
280          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
281          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
282          self.jobDestination=[]  # Site destination(s) for each job (list of lists)
# Line 223 | Line 285 | class Cmssw(JobType):
285          blockSites = {}
286          if self.datasetPath:
287              blockSites = self.DataDiscoveryAndLocation(cfg_params)
288 <        #DBSDLS-end          
288 >        #DBSDLS-end
289  
290          self.tgzNameWithPath = self.getTarBall(self.executable)
291 <    
291 >
292          ## Select Splitting
293 <        if self.selectNoInput:
293 >        if self.selectNoInput:
294              if self.pset == None: #CarlosDaniele
295                  self.jobSplittingForScript()
296              else:
297                  self.jobSplittingNoInput()
298 <        else:
298 >        else:
299              self.jobSplittingByBlocks(blockSites)
300  
301          # modify Pset
# Line 241 | Line 303 | class Cmssw(JobType):
303              try:
304                  if (self.datasetPath): # standard job
305                      # allow to processa a fraction of events in a file
306 <                    self.PsetEdit.inputModule("INPUT")
307 <                    self.PsetEdit.maxEvent("INPUTMAXEVENTS")
308 <                    self.PsetEdit.skipEvent("INPUTSKIPEVENTS")
306 >                    PsetEdit.inputModule("INPUTFILE")
307 >                    PsetEdit.maxEvent(0)
308 >                    PsetEdit.skipEvent(0)
309                  else:  # pythia like job
310 <                    self.PsetEdit.maxEvent(self.eventsPerJob)
310 >                    PsetEdit.maxEvent(self.eventsPerJob)
311                      if (self.firstRun):
312 <                        self.PsetEdit.pythiaFirstRun("INPUTFIRSTRUN")  #First Run
312 >                        PsetEdit.pythiaFirstRun(0)  #First Run
313                      if (self.sourceSeed) :
314 <                        self.PsetEdit.pythiaSeed("INPUT")
314 >                        PsetEdit.pythiaSeed(0)
315                          if (self.sourceSeedVtx) :
316 <                            self.PsetEdit.pythiaSeedVtx("INPUTVTX")
316 >                            PsetEdit.vtxSeed(0)
317 >                        if (self.sourceSeedG4) :
318 >                            PsetEdit.g4Seed(0)
319 >                        if (self.sourceSeedMix) :
320 >                            PsetEdit.mixSeed(0)
321                  # add FrameworkJobReport to parameter-set
322 <                self.PsetEdit.addCrabFJR(self.fjrFileName)
323 <                self.PsetEdit.psetWriter(self.configFilename())
322 >                PsetEdit.addCrabFJR(self.fjrFileName)
323 >                PsetEdit.psetWriter(self.configFilename())
324              except:
325                  msg='Error while manipuliating ParameterSet: exiting...'
326                  raise CrabException(msg)
327  
328      def DataDiscoveryAndLocation(self, cfg_params):
329  
330 +        import DataDiscovery
331 +        import DataDiscovery_DBS2
332 +        import DataLocation
333          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
334  
335          datasetPath=self.datasetPath
336  
268        ## TODO
269        dataTiersList = ""
270        dataTiers = dataTiersList.split(',')
271
337          ## Contact the DBS
338 <        common.logger.message("Contacting DBS...")
338 >        common.logger.message("Contacting Data Discovery Services ...")
339          try:
340 <            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, dataTiers, cfg_params)
340 >
341 >            if self.use_dbs_1 == 1 :
342 >                self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
343 >            else :
344 >                self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
345              self.pubdata.fetchDBSInfo()
346  
347          except DataDiscovery.NotExistingDatasetError, ex :
348              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
349              raise CrabException(msg)
281
350          except DataDiscovery.NoDataTierinProvenanceError, ex :
351              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
352              raise CrabException(msg)
353          except DataDiscovery.DataDiscoveryError, ex:
354 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
354 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
355 >            raise CrabException(msg)
356 >        except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
357 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
358 >            raise CrabException(msg)
359 >        except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
360 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
361 >            raise CrabException(msg)
362 >        except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
363 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
364              raise CrabException(msg)
288
289        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
290        ## self.DBSPaths=self.pubdata.getDBSPaths()
291        common.logger.message("Required data are :"+self.datasetPath)
365  
366          self.filesbyblock=self.pubdata.getFiles()
367          self.eventsbyblock=self.pubdata.getEventsPerBlock()
368          self.eventsbyfile=self.pubdata.getEventsPerFile()
296        # print str(self.filesbyblock)
297        # print 'self.eventsbyfile',len(self.eventsbyfile)
298        # print str(self.eventsbyfile)
369  
370          ## get max number of events
371 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
302 <        common.logger.message("The number of available events is %s\n"%self.maxEvents)
371 >        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
372  
304        common.logger.message("Contacting DLS...")
373          ## Contact the DLS and build a list of sites hosting the fileblocks
374          try:
375              dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
# Line 309 | Line 377 | class Cmssw(JobType):
377          except DataLocation.DataLocationError , ex:
378              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
379              raise CrabException(msg)
380 <        
380 >
381  
382          sites = dataloc.getSites()
383          allSites = []
# Line 319 | Line 387 | class Cmssw(JobType):
387                  allSites.append(oneSite)
388          allSites = self.uniquelist(allSites)
389  
390 <        common.logger.message("Sites ("+str(len(allSites))+") hosting part/all of dataset: "+str(allSites))
391 <        common.logger.debug(6, "List of Sites: "+str(allSites))
390 >        # screen output
391 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
392 >
393          return sites
394 <    
394 >
395      def jobSplittingByBlocks(self, blockSites):
396          """
397          Perform job splitting. Jobs run over an integer number of files
# Line 372 | Line 441 | class Cmssw(JobType):
441              totalNumberOfJobs = 999999999
442          else :
443              totalNumberOfJobs = self.ncjobs
444 <            
444 >
445  
446          blocks = blockSites.keys()
447          blockCount = 0
# Line 382 | Line 451 | class Cmssw(JobType):
451          jobCount = 0
452          list_of_lists = []
453  
454 +        # list tracking which jobs are in which jobs belong to which block
455 +        jobsOfBlock = {}
456 +
457          # ---- Iterate over the blocks in the dataset until ---- #
458          # ---- we've met the requested total # of events    ---- #
459          while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
460              block = blocks[blockCount]
461              blockCount += 1
462 <            
462 >            if block not in jobsOfBlock.keys() :
463 >                jobsOfBlock[block] = []
464  
465 <            numEventsInBlock = self.eventsbyblock[block]
466 <            common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
467 <            
468 <            files = self.filesbyblock[block]
469 <            numFilesInBlock = len(files)
470 <            if (numFilesInBlock <= 0):
471 <                continue
472 <            fileCount = 0
473 <
474 <            # ---- New block => New job ---- #
475 <            parString = "\\{"
476 <            # counter for number of events in files currently worked on
477 <            filesEventCount = 0
478 <            # flag if next while loop should touch new file
479 <            newFile = 1
480 <            # job event counter
481 <            jobSkipEventCount = 0
482 <            
483 <            # ---- Iterate over the files in the block until we've met the requested ---- #
484 <            # ---- total # of events or we've gone over all the files in this block  ---- #
485 <            while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
486 <                file = files[fileCount]
487 <                if newFile :
488 <                    try:
489 <                        numEventsInFile = self.eventsbyfile[file]
490 <                        common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
491 <                        # increase filesEventCount
492 <                        filesEventCount += numEventsInFile
493 <                        # Add file to current job
494 <                        parString += '\\\"' + file + '\\\"\,'
495 <                        newFile = 0
496 <                    except KeyError:
497 <                        common.logger.message("File "+str(file)+" has unknown number of events: skipping")
498 <                        
499 <
500 <                # if less events in file remain than eventsPerJobRequested
501 <                if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
502 <                    # if last file in block
503 <                    if ( fileCount == numFilesInBlock-1 ) :
504 <                        # end job using last file, use remaining events in block
465 >            if self.eventsbyblock.has_key(block) :
466 >                numEventsInBlock = self.eventsbyblock[block]
467 >                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
468 >
469 >                files = self.filesbyblock[block]
470 >                numFilesInBlock = len(files)
471 >                if (numFilesInBlock <= 0):
472 >                    continue
473 >                fileCount = 0
474 >
475 >                # ---- New block => New job ---- #
476 >                parString = ""
477 >                # counter for number of events in files currently worked on
478 >                filesEventCount = 0
479 >                # flag if next while loop should touch new file
480 >                newFile = 1
481 >                # job event counter
482 >                jobSkipEventCount = 0
483 >
484 >                # ---- Iterate over the files in the block until we've met the requested ---- #
485 >                # ---- total # of events or we've gone over all the files in this block  ---- #
486 >                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
487 >                    file = files[fileCount]
488 >                    if newFile :
489 >                        try:
490 >                            numEventsInFile = self.eventsbyfile[file]
491 >                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
492 >                            # increase filesEventCount
493 >                            filesEventCount += numEventsInFile
494 >                            # Add file to current job
495 >                            parString += '\\\"' + file + '\\\"\,'
496 >                            newFile = 0
497 >                        except KeyError:
498 >                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
499 >
500 >
501 >                    # if less events in file remain than eventsPerJobRequested
502 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
503 >                        # if last file in block
504 >                        if ( fileCount == numFilesInBlock-1 ) :
505 >                            # end job using last file, use remaining events in block
506 >                            # close job and touch new file
507 >                            fullString = parString[:-2]
508 >                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
509 >                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
510 >                            self.jobDestination.append(blockSites[block])
511 >                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
512 >                            # fill jobs of block dictionary
513 >                            jobsOfBlock[block].append(jobCount+1)
514 >                            # reset counter
515 >                            jobCount = jobCount + 1
516 >                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
517 >                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
518 >                            jobSkipEventCount = 0
519 >                            # reset file
520 >                            parString = ""
521 >                            filesEventCount = 0
522 >                            newFile = 1
523 >                            fileCount += 1
524 >                        else :
525 >                            # go to next file
526 >                            newFile = 1
527 >                            fileCount += 1
528 >                    # if events in file equal to eventsPerJobRequested
529 >                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
530                          # close job and touch new file
531                          fullString = parString[:-2]
532 <                        fullString += '\\}'
533 <                        list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
436 <                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
532 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
533 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
534                          self.jobDestination.append(blockSites[block])
535                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
536 +                        jobsOfBlock[block].append(jobCount+1)
537                          # reset counter
538                          jobCount = jobCount + 1
539 <                        totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
540 <                        eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
539 >                        totalEventCount = totalEventCount + eventsPerJobRequested
540 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
541                          jobSkipEventCount = 0
542                          # reset file
543 <                        parString = "\\{"
543 >                        parString = ""
544                          filesEventCount = 0
545                          newFile = 1
546                          fileCount += 1
547 +
548 +                    # if more events in file remain than eventsPerJobRequested
549                      else :
550 <                        # go to next file
551 <                        newFile = 1
552 <                        fileCount += 1
553 <                # if events in file equal to eventsPerJobRequested
554 <                elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
555 <                    # close job and touch new file
556 <                    fullString = parString[:-2]
557 <                    fullString += '\\}'
558 <                    list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
559 <                    common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
560 <                    self.jobDestination.append(blockSites[block])
561 <                    common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
562 <                    # reset counter
563 <                    jobCount = jobCount + 1
564 <                    totalEventCount = totalEventCount + eventsPerJobRequested
565 <                    eventsRemaining = eventsRemaining - eventsPerJobRequested
566 <                    jobSkipEventCount = 0
567 <                    # reset file
568 <                    parString = "\\{"
569 <                    filesEventCount = 0
470 <                    newFile = 1
471 <                    fileCount += 1
472 <                    
473 <                # if more events in file remain than eventsPerJobRequested
474 <                else :
475 <                    # close job but don't touch new file
476 <                    fullString = parString[:-2]
477 <                    fullString += '\\}'
478 <                    list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
479 <                    common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
480 <                    self.jobDestination.append(blockSites[block])
481 <                    common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
482 <                    # increase counter
483 <                    jobCount = jobCount + 1
484 <                    totalEventCount = totalEventCount + eventsPerJobRequested
485 <                    eventsRemaining = eventsRemaining - eventsPerJobRequested
486 <                    # calculate skip events for last file
487 <                    # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
488 <                    jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
489 <                    # remove all but the last file
490 <                    filesEventCount = self.eventsbyfile[file]
491 <                    parString = "\\{"
492 <                    parString += '\\\"' + file + '\\\"\,'
493 <                pass # END if
494 <            pass # END while (iterate over files in the block)
550 >                        # close job but don't touch new file
551 >                        fullString = parString[:-2]
552 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
553 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
554 >                        self.jobDestination.append(blockSites[block])
555 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
556 >                        jobsOfBlock[block].append(jobCount+1)
557 >                        # increase counter
558 >                        jobCount = jobCount + 1
559 >                        totalEventCount = totalEventCount + eventsPerJobRequested
560 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
561 >                        # calculate skip events for last file
562 >                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
563 >                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
564 >                        # remove all but the last file
565 >                        filesEventCount = self.eventsbyfile[file]
566 >                        parString = ""
567 >                        parString += '\\\"' + file + '\\\"\,'
568 >                    pass # END if
569 >                pass # END while (iterate over files in the block)
570          pass # END while (iterate over blocks in the dataset)
571          self.ncjobs = self.total_number_of_jobs = jobCount
572          if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
573              common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
574 <        common.logger.message("\n"+str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
575 <        
574 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
575 >
576 >        # screen output
577 >        screenOutput = "List of jobs and available destination sites:\n\n"
578 >
579 >        # keep trace of block with no sites to print a warning at the end
580 >        noSiteBlock = []
581 >        bloskNoSite = []
582 >
583 >        blockCounter = 0
584 >        for block in blocks:
585 >            if block in jobsOfBlock.keys() :
586 >                blockCounter += 1
587 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
588 >                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
589 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
590 >                    bloskNoSite.append( blockCounter )
591 >
592 >        common.logger.message(screenOutput)
593 >        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
594 >            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
595 >            virgola = ""
596 >            if len(bloskNoSite) > 1:
597 >                virgola = ","
598 >            for block in bloskNoSite:
599 >                msg += ' ' + str(block) + virgola
600 >            msg += '\n               Related jobs:\n                 '
601 >            virgola = ""
602 >            if len(noSiteBlock) > 1:
603 >                virgola = ","
604 >            for range_jobs in noSiteBlock:
605 >                msg += str(range_jobs) + virgola
606 >            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
607 >            common.logger.message(msg)
608 >
609          self.list_of_args = list_of_lists
610          return
611  
# Line 506 | Line 614 | class Cmssw(JobType):
614          Perform job splitting based on number of event per job
615          """
616          common.logger.debug(5,'Splitting per events')
617 <        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
618 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
619 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
617 >
618 >        if (self.selectEventsPerJob):
619 >            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
620 >        if (self.selectNumberOfJobs):
621 >            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
622 >        if (self.selectTotalNumberEvents):
623 >            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
624  
625          if (self.total_number_of_events < 0):
626              msg='Cannot split jobs per Events with "-1" as total number of events'
627              raise CrabException(msg)
628  
629          if (self.selectEventsPerJob):
630 <            self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
630 >            if (self.selectTotalNumberEvents):
631 >                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
632 >            elif(self.selectNumberOfJobs) :
633 >                self.total_number_of_jobs =self.theNumberOfJobs
634 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
635 >
636          elif (self.selectNumberOfJobs) :
637              self.total_number_of_jobs = self.theNumberOfJobs
638              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
# Line 536 | Line 653 | class Cmssw(JobType):
653          for i in range(self.total_number_of_jobs):
654              ## Since there is no input, any site is good
655             # self.jobDestination.append(["Any"])
656 <            self.jobDestination.append([""]) #must be empty to write correctly the xml
657 <            args=''
656 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
657 >            args=[]
658              if (self.firstRun):
659                      ## pythia first run
660                  #self.list_of_args.append([(str(self.firstRun)+str(i))])
661 <                args=args+(str(self.firstRun)+str(i))
661 >                args.append(str(self.firstRun)+str(i))
662              else:
663                  ## no first run
664                  #self.list_of_args.append([str(i)])
665 <                args=args+str(i)
665 >                args.append(str(i))
666              if (self.sourceSeed):
667 +                args.append(str(self.sourceSeed)+str(i))
668                  if (self.sourceSeedVtx):
669 <                    ## pythia + vtx random seed
670 <                    #self.list_of_args.append([
671 <                    #                          str(self.sourceSeed)+str(i),
672 <                    #                          str(self.sourceSeedVtx)+str(i)
673 <                    #                          ])
674 <                    args=args+str(',')+str(self.sourceSeed)+str(i)+str(',')+str(self.sourceSeedVtx)+str(i)
675 <                else:
676 <                    ## only pythia random seed
677 <                    #self.list_of_args.append([(str(self.sourceSeed)+str(i))])
678 <                    args=args +str(',')+str(self.sourceSeed)+str(i)
679 <            else:
680 <                ## no random seed
681 <                if str(args)=='': args=args+(str(self.firstRun)+str(i))
682 <            arguments=args.split(',')
565 <            if len(arguments)==3:self.list_of_args.append([str(arguments[0]),str(arguments[1]),str(arguments[2])])
566 <            elif len(arguments)==2:self.list_of_args.append([str(arguments[0]),str(arguments[1])])
567 <            else :self.list_of_args.append([str(arguments[0])])
568 <            
569 <     #   print self.list_of_args
669 >                    ## + vtx random seed
670 >                    args.append(str(self.sourceSeedVtx)+str(i))
671 >                if (self.sourceSeedG4):
672 >                    ## + G4 random seed
673 >                    args.append(str(self.sourceSeedG4)+str(i))
674 >                if (self.sourceSeedMix):
675 >                    ## + Mix random seed
676 >                    args.append(str(self.sourceSeedMix)+str(i))
677 >                pass
678 >            pass
679 >            self.list_of_args.append(args)
680 >        pass
681 >
682 >        # print self.list_of_args
683  
684          return
685  
# Line 595 | Line 708 | class Cmssw(JobType):
708          return
709  
710      def split(self, jobParams):
711 <
711 >
712          common.jobDB.load()
713          #### Fabio
714          njobs = self.total_number_of_jobs
# Line 603 | Line 716 | class Cmssw(JobType):
716          # create the empty structure
717          for i in range(njobs):
718              jobParams.append("")
719 <        
719 >
720          for job in range(njobs):
721              jobParams[job] = arglist[job]
722              # print str(arglist[job])
# Line 614 | Line 727 | class Cmssw(JobType):
727  
728          common.jobDB.save()
729          return
730 <    
730 >
731      def getJobTypeArguments(self, nj, sched):
732          result = ''
733          for i in common.jobDB.arguments(nj):
734              result=result+str(i)+" "
735          return result
736 <  
736 >
737      def numberOfJobs(self):
738          # Fabio
739          return self.total_number_of_jobs
# Line 629 | Line 742 | class Cmssw(JobType):
742          """
743          Return the TarBall with lib and exe
744          """
745 <        
745 >
746          # if it exist, just return it
747          #
748          # Marco. Let's start to use relative path for Boss XML files
# Line 652 | Line 765 | class Cmssw(JobType):
765          # print "swVersion = ", swVersion
766          swReleaseTop = self.scram.getReleaseTop_()
767          #print "swReleaseTop = ", swReleaseTop
768 <        
768 >
769          ## check if working area is release top
770          if swReleaseTop == '' or swArea == swReleaseTop:
771              return
# Line 665 | Line 778 | class Cmssw(JobType):
778                  exeWithPath = self.scram.findFile_(executable)
779                  if ( not exeWithPath ):
780                      raise CrabException('User executable '+executable+' not found')
781 <    
781 >
782                  ## then check if it's private or not
783                  if exeWithPath.find(swReleaseTop) == -1:
784                      # the exe is private, so we must ship
785                      common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
786                      path = swArea+'/'
787 <                    exe = string.replace(exeWithPath, path,'')
788 <                    tar.add(path+exe,exe)
787 >                    # distinguish case when script is in user project area or given by full path somewhere else
788 >                    if exeWithPath.find(path) >= 0 :
789 >                        exe = string.replace(exeWithPath, path,'')
790 >                        tar.add(path+exe,exe)
791 >                    else :
792 >                        tar.add(exeWithPath,os.path.basename(executable))
793                      pass
794                  else:
795                      # the exe is from release, we'll find it on WN
796                      pass
797 <    
797 >
798              ## Now get the libraries: only those in local working area
799              libDir = 'lib'
800              lib = swArea+'/' +libDir
801              common.logger.debug(5,"lib "+lib+" to be tarred")
802              if os.path.exists(lib):
803                  tar.add(lib,libDir)
804 <    
804 >
805              ## Now check if module dir is present
806              moduleDir = 'module'
807              module = swArea + '/' + moduleDir
# Line 703 | Line 820 | class Cmssw(JobType):
820              pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
821              if os.path.isdir(pa):
822                  tar.add(pa,paDir)
823 <        
823 >
824 >            ### FEDE FOR DBS PUBLICATION
825 >            ## Add PRODCOMMON dir to tar
826 >            prodcommonDir = 'ProdCommon'
827 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
828 >            if os.path.isdir(prodcommonPath):
829 >                tar.add(prodcommonPath,prodcommonDir)
830 >            #############################
831 >
832              common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
833              tar.close()
834          except :
835              raise CrabException('Could not create tar-ball')
836 <        
836 >
837 >        ## check for tarball size
838 >        tarballinfo = os.stat(self.tgzNameWithPath)
839 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
840 >            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
841 >
842          ## create tar-ball with ML stuff
843 <        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
843 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
844          try:
845              tar = tarfile.open(self.MLtgzfile, "w:gz")
846              path=os.environ['CRABDIR'] + '/python/'
# Line 720 | Line 850 | class Cmssw(JobType):
850              tar.close()
851          except :
852              raise CrabException('Could not create ML files tar-ball')
853 <        
853 >
854          return
855 <        
855 >
856 >    def additionalInputFileTgz(self):
857 >        """
858 >        Put all additional files into a tar ball and return its name
859 >        """
860 >        import tarfile
861 >        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
862 >        tar = tarfile.open(tarName, "w:gz")
863 >        for file in self.additional_inbox_files:
864 >            tar.add(file,string.split(file,'/')[-1])
865 >        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
866 >        tar.close()
867 >        return tarName
868 >
869      def wsSetupEnvironment(self, nj):
870          """
871          Returns part of a job script which prepares
872          the execution environment for the job 'nj'.
873          """
874          # Prepare JobType-independent part
875 <        txt = ''
876 <  
877 <        ## OLI_Daniele at this level  middleware already known
735 <
736 <        txt += 'if [ $middleware == LCG ]; then \n'
875 >        txt = ''
876 >        txt += 'echo ">>> setup environment"\n'
877 >        txt += 'if [ $middleware == LCG ]; then \n'
878          txt += self.wsSetupCMSLCGEnvironment_()
879          txt += 'elif [ $middleware == OSG ]; then\n'
880          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
881 <        txt += '    echo "Created working directory: $WORKING_DIR"\n'
741 <        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
881 >        txt += '    if [ ! $? == 0 ] ;then\n'
882          txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
883 <        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
884 <        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
885 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
746 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
747 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
748 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
883 >        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
884 >        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
885 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
886          txt += '        exit 1\n'
887          txt += '    fi\n'
888 +        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
889          txt += '\n'
890          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
891          txt += '    cd $WORKING_DIR\n'
892 <        txt += self.wsSetupCMSOSGEnvironment_()
892 >        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
893 >        txt += self.wsSetupCMSOSGEnvironment_()
894 >        #txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
895 >        #txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
896          txt += 'fi\n'
897  
898          # Prepare JobType-specific part
899          scram = self.scram.commandName()
900          txt += '\n\n'
901 <        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
901 >        txt += 'echo ">>> specific cmssw setup environment:"\n'
902 >        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
903          txt += scram+' project CMSSW '+self.version+'\n'
904          txt += 'status=$?\n'
905          txt += 'if [ $status != 0 ] ; then\n'
906 <        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
907 <        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
908 <        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
909 <        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
768 <        txt += '   rm -f $RUNTIME_AREA/$repo \n'
769 <        txt += '   echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
770 <        txt += '   echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
771 <        ## OLI_Daniele
906 >        txt += '    echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
907 >        txt += '    echo "JOB_EXIT_STATUS = 10034"\n'
908 >        txt += '    echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
909 >        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
910          txt += '    if [ $middleware == OSG ]; then \n'
773        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
911          txt += '        cd $RUNTIME_AREA\n'
912 +        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
913 +        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
914          txt += '        /bin/rm -rf $WORKING_DIR\n'
915          txt += '        if [ -d $WORKING_DIR ] ;then\n'
916 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
917 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
918 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
919 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
781 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
782 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
783 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
916 >        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
917 >        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
918 >        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
919 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
920          txt += '        fi\n'
921          txt += '    fi \n'
922 <        txt += '   exit 1 \n'
922 >        txt += '    exit 1 \n'
923          txt += 'fi \n'
788        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
924          txt += 'cd '+self.version+'\n'
925 +        ########## FEDE FOR DBS2 ######################
926 +        txt += 'SOFTWARE_DIR=`pwd`\n'
927 +        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
928 +        ###############################################
929          ### needed grep for bug in scramv1 ###
930          txt += scram+' runtime -sh\n'
931          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
# Line 796 | Line 935 | class Cmssw(JobType):
935          txt += "\n"
936          txt += "## number of arguments (first argument always jobnumber)\n"
937          txt += "\n"
799 #        txt += "narg=$#\n"
938          txt += "if [ $nargs -lt 2 ]\n"
939          txt += "then\n"
940          txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
941          txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
942          txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
943          txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
806        txt += '    rm -f $RUNTIME_AREA/$repo \n'
807        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
808        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
809        ## OLI_Daniele
944          txt += '    if [ $middleware == OSG ]; then \n'
811        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
945          txt += '        cd $RUNTIME_AREA\n'
946 +        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
947 +        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
948          txt += '        /bin/rm -rf $WORKING_DIR\n'
949          txt += '        if [ -d $WORKING_DIR ] ;then\n'
950 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
951 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
952 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
953 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
819 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
820 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
821 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
950 >        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
951 >        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
952 >        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
953 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
954          txt += '        fi\n'
955          txt += '    fi \n'
956          txt += "    exit 1\n"
# Line 827 | Line 959 | class Cmssw(JobType):
959  
960          # Prepare job-specific part
961          job = common.job_list[nj]
962 +        ### FEDE FOR DBS OUTPUT PUBLICATION
963 +        if (self.datasetPath):
964 +            txt += '\n'
965 +            txt += 'DatasetPath='+self.datasetPath+'\n'
966 +
967 +            datasetpath_split = self.datasetPath.split("/")
968 +
969 +            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
970 +            txt += 'DataTier='+datasetpath_split[2]+'\n'
971 +            txt += 'ApplicationFamily=cmsRun\n'
972 +
973 +        else:
974 +            txt += 'DatasetPath=MCDataTier\n'
975 +            txt += 'PrimaryDataset=null\n'
976 +            txt += 'DataTier=null\n'
977 +            txt += 'ApplicationFamily=MCDataTier\n'
978          if self.pset != None: #CarlosDaniele
979              pset = os.path.basename(job.configFilename())
980              txt += '\n'
981 +            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
982              if (self.datasetPath): # standard job
834                #txt += 'InputFiles=$2\n'
983                  txt += 'InputFiles=${args[1]}\n'
984                  txt += 'MaxEvents=${args[2]}\n'
985                  txt += 'SkipEvents=${args[3]}\n'
986                  txt += 'echo "Inputfiles:<$InputFiles>"\n'
987 <                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset_tmp_1.cfg\n'
987 >                txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
988                  txt += 'echo "MaxEvents:<$MaxEvents>"\n'
989 <                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" pset_tmp_1.cfg > pset_tmp_2.cfg\n'
989 >                txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
990                  txt += 'echo "SkipEvents:<$SkipEvents>"\n'
991 <                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" pset_tmp_2.cfg > pset.cfg\n'
991 >                txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
992              else:  # pythia like job
993 <                if (self.sourceSeed):
994 <                    txt += 'FirstRun=${args[1]}\n'
993 >                seedIndex=1
994 >                if (self.firstRun):
995 >                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
996                      txt += 'echo "FirstRun: <$FirstRun>"\n'
997 <                    txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" $RUNTIME_AREA/'+pset+' > tmp_1.cfg\n'
998 <                else:
999 <                    txt += '# Copy untouched pset\n'
851 <                    txt += 'cp $RUNTIME_AREA/'+pset+' tmp_1.cfg\n'
997 >                    txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
998 >                    seedIndex=seedIndex+1
999 >
1000                  if (self.sourceSeed):
1001 < #                    txt += 'Seed=$2\n'
1002 <                    txt += 'Seed=${args[2]}\n'
1003 <                    txt += 'echo "Seed: <$Seed>"\n'
1004 <                    txt += 'sed "s#\<INPUT\>#$Seed#" tmp_1.cfg > tmp_2.cfg\n'
1001 >                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
1002 >                    txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1003 >                    seedIndex=seedIndex+1
1004 >                    ## the following seeds are not always present
1005                      if (self.sourceSeedVtx):
1006 < #                        txt += 'VtxSeed=$3\n'
859 <                        txt += 'VtxSeed=${args[3]}\n'
1006 >                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1007                          txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1008 <                        txt += 'sed "s#INPUTVTX#$VtxSeed#" tmp_2.cfg > pset.cfg\n'
1009 <                    else:
1010 <                        txt += 'mv tmp_2.cfg pset.cfg\n'
1011 <                else:
1012 <                    txt += 'mv tmp_1.cfg pset.cfg\n'
1013 <                   # txt += '# Copy untouched pset\n'
1014 <                   # txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n'
1015 <
1008 >                        txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1009 >                        seedIndex += 1
1010 >                    if (self.sourceSeedG4):
1011 >                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1012 >                        txt += 'echo "G4Seed: <$G4Seed>"\n'
1013 >                        txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1014 >                        seedIndex += 1
1015 >                    if (self.sourceSeedMix):
1016 >                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1017 >                        txt += 'echo "MixSeed: <$mixSeed>"\n'
1018 >                        txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1019 >                        seedIndex += 1
1020 >                    pass
1021 >                pass
1022 >            txt += 'mv -f '+pset+' pset.cfg\n'
1023  
1024          if len(self.additional_inbox_files) > 0:
1025 <            for file in self.additional_inbox_files:
1026 <                relFile = file.split("/")[-1]
1027 <                txt += 'if [ -e $RUNTIME_AREA/'+relFile+' ] ; then\n'
1028 <                txt += '   cp $RUNTIME_AREA/'+relFile+' .\n'
875 <                txt += '   chmod +x '+relFile+'\n'
876 <                txt += 'fi\n'
877 <            pass
1025 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1026 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1027 >            txt += 'fi\n'
1028 >            pass
1029  
1030          if self.pset != None: #CarlosDaniele
880            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
881        
1031              txt += '\n'
1032              txt += 'echo "***** cat pset.cfg *********"\n'
1033              txt += 'cat pset.cfg\n'
1034              txt += 'echo "****** end pset.cfg ********"\n'
1035              txt += '\n'
1036 <            # txt += 'echo "***** cat pset1.cfg *********"\n'
1037 <            # txt += 'cat pset1.cfg\n'
1038 <            # txt += 'echo "****** end pset1.cfg ********"\n'
1036 >            ### FEDE FOR DBS OUTPUT PUBLICATION
1037 >            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1038 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
1039 >            ##############
1040 >            txt += '\n'
1041          return txt
1042  
1043      def wsBuildExe(self, nj=0):
# Line 898 | Line 1049 | class Cmssw(JobType):
1049          txt = ""
1050  
1051          if os.path.isfile(self.tgzNameWithPath):
1052 <            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
1052 >            txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
1053              txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1054              txt += 'untar_status=$? \n'
1055              txt += 'if [ $untar_status -ne 0 ]; then \n'
# Line 906 | Line 1057 | class Cmssw(JobType):
1057              txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
1058              txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1059              txt += '   if [ $middleware == OSG ]; then \n'
909            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1060              txt += '       cd $RUNTIME_AREA\n'
1061 +            txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1062 +            txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
1063              txt += '       /bin/rm -rf $WORKING_DIR\n'
1064              txt += '       if [ -d $WORKING_DIR ] ;then\n'
1065              txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1066              txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1067              txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1068              txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
917            txt += '           rm -f $RUNTIME_AREA/$repo \n'
918            txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
919            txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1069              txt += '       fi\n'
1070              txt += '   fi \n'
1071              txt += '   \n'
# Line 925 | Line 1074 | class Cmssw(JobType):
1074              txt += '   echo "Successful untar" \n'
1075              txt += 'fi \n'
1076              txt += '\n'
1077 <            txt += 'echo "Include ProdAgentApi in PYTHONPATH"\n'
1077 >            txt += 'echo ">>> Include ProdAgentApi and PRODCOMMON in PYTHONPATH:"\n'
1078              txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1079 <            txt += '   export PYTHONPATH=ProdAgentApi\n'
1079 >            #### FEDE FOR DBS OUTPUT PUBLICATION
1080 >            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1081              txt += 'else\n'
1082 <            txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1082 >            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1083 >            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1084 >            ###################
1085              txt += 'fi\n'
1086              txt += '\n'
1087  
1088              pass
1089 <        
1089 >
1090          return txt
1091  
1092      def modifySteeringCards(self, nj):
1093          """
1094 <        modify the card provided by the user,
1094 >        modify the card provided by the user,
1095          writing a new card into share dir
1096          """
1097 <        
1097 >
1098      def executableName(self):
1099 <        if self.pset == None: #CarlosDaniele
1099 >        if self.scriptExe: #CarlosDaniele
1100              return "sh "
1101          else:
1102              return self.executable
1103  
1104      def executableArgs(self):
1105 <        if self.pset == None:#CarlosDaniele
1105 >        if self.scriptExe:#CarlosDaniele
1106              return   self.scriptExe + " $NJob"
1107 <        else:
1108 <            return " -p pset.cfg"
1107 >        else:
1108 >            # if >= CMSSW_1_5_X, add -e
1109 >            version_array = self.scram.getSWVersion().split('_')
1110 >            major = 0
1111 >            minor = 0
1112 >            try:
1113 >                major = int(version_array[1])
1114 >                minor = int(version_array[2])
1115 >            except:
1116 >                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1117 >                raise CrabException(msg)
1118 >            if major >= 1 and minor >= 5 :
1119 >                return " -e -p pset.cfg"
1120 >            else:
1121 >                return " -p pset.cfg"
1122  
1123      def inputSandbox(self, nj):
1124          """
# Line 968 | Line 1133 | class Cmssw(JobType):
1133          if os.path.isfile(self.MLtgzfile):
1134              inp_box.append(self.MLtgzfile)
1135          ## config
1136 <        if not self.pset is None: #CarlosDaniele
1136 >        if not self.pset is None:
1137              inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1138          ## additional input files
1139 <        #for file in self.additional_inbox_files:
1140 <        #    inp_box.append(common.work_space.cwdDir()+file)
1139 >        tgz = self.additionalInputFileTgz()
1140 >        inp_box.append(tgz)
1141          return inp_box
1142  
1143      def outputSandbox(self, nj):
# Line 983 | Line 1148 | class Cmssw(JobType):
1148  
1149          ## User Declared output files
1150          for out in (self.output_file+self.output_file_sandbox):
1151 <            n_out = nj + 1
1151 >            n_out = nj + 1
1152              out_box.append(self.numberFile_(out,str(n_out)))
1153          return out_box
1154  
# Line 999 | Line 1164 | class Cmssw(JobType):
1164          """
1165  
1166          txt = '\n'
1167 <        txt += '# directory content\n'
1167 >        txt += 'echo" >>> directory content:"\n'
1168          txt += 'ls \n'
1169 +        txt = '\n'
1170 +
1171 +        txt += 'output_exit_status=0\n'
1172  
1173 <        for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1173 >        for fileWithSuffix in (self.output_file_sandbox):
1174              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1175              txt += '\n'
1176              txt += '# check output file\n'
1177 <            txt += 'ls '+fileWithSuffix+'\n'
1178 <            txt += 'ls_result=$?\n'
1179 <            txt += 'if [ $ls_result -ne 0 ] ; then\n'
1180 <            txt += '   echo "ERROR: Problem with output file"\n'
1177 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1178 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1179 >            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1180 >            txt += 'else\n'
1181 >            txt += '    exit_status=60302\n'
1182 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1183              if common.scheduler.boss_scheduler_name == 'condor_g':
1184                  txt += '    if [ $middleware == OSG ]; then \n'
1185                  txt += '        echo "prepare dummy output file"\n'
1186                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1187                  txt += '    fi \n'
1018            txt += 'else\n'
1019            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1188              txt += 'fi\n'
1021      
1022        txt += 'cd $RUNTIME_AREA\n'
1023        txt += 'cd $RUNTIME_AREA\n'
1024        ### OLI_DANIELE
1025        txt += 'if [ $middleware == OSG ]; then\n'  
1026        txt += '    cd $RUNTIME_AREA\n'
1027        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1028        txt += '    /bin/rm -rf $WORKING_DIR\n'
1029        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1030        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1031        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
1032        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1033        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1034        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1035        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1036        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1037        txt += '    fi\n'
1038        txt += 'fi\n'
1039        txt += '\n'
1189  
1190 <        file_list = ''
1042 <        ## Add to filelist only files to be possibly copied to SE
1043 <        for fileWithSuffix in self.output_file:
1190 >        for fileWithSuffix in (self.output_file):
1191              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1192 <            file_list=file_list+output_file_num+' '
1193 <        file_list=file_list[:-1]
1194 <        txt += 'file_list="'+file_list+'"\n'
1192 >            txt += '\n'
1193 >            txt += '# check output file\n'
1194 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1195 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1196 >            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1197 >            txt += 'else\n'
1198 >            txt += '    exit_status=60302\n'
1199 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1200 >            txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1201 >            txt += '    output_exit_status=$exit_status\n'
1202 >            if common.scheduler.boss_scheduler_name == 'condor_g':
1203 >                txt += '    if [ $middleware == OSG ]; then \n'
1204 >                txt += '        echo "prepare dummy output file"\n'
1205 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1206 >                txt += '    fi \n'
1207 >            txt += 'fi\n'
1208 >        file_list = []
1209 >        for fileWithSuffix in (self.output_file):
1210 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1211  
1212 +        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1213 +        txt += 'cd $RUNTIME_AREA\n'
1214 +        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1215          return txt
1216  
1217      def numberFile_(self, file, txt):
# Line 1056 | Line 1222 | class Cmssw(JobType):
1222          # take away last extension
1223          name = p[0]
1224          for x in p[1:-1]:
1225 <           name=name+"."+x
1225 >            name=name+"."+x
1226          # add "_txt"
1227          if len(p)>1:
1228 <          ext = p[len(p)-1]
1229 <          result = name + '_' + txt + "." + ext
1228 >            ext = p[len(p)-1]
1229 >            result = name + '_' + txt + "." + ext
1230          else:
1231 <          result = name + '_' + txt
1232 <        
1231 >            result = name + '_' + txt
1232 >
1233          return result
1234  
1235      def getRequirements(self, nj=[]):
1236          """
1237 <        return job requirements to add to jdl files
1237 >        return job requirements to add to jdl files
1238          """
1239          req = ''
1240          if self.version:
1241              req='Member("VO-cms-' + \
1242                   self.version + \
1243                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1244 +        ## SL add requirement for OS version only if SL4
1245 +        #reSL4 = re.compile( r'slc4' )
1246 +        if self.executable_arch: # and reSL4.search(self.executable_arch):
1247 +            req+=' && Member("VO-cms-' + \
1248 +                 self.executable_arch + \
1249 +                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1250  
1251          req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1252  
# Line 1084 | Line 1256 | class Cmssw(JobType):
1256          """ return the config filename """
1257          return self.name()+'.cfg'
1258  
1087    ### OLI_DANIELE
1259      def wsSetupCMSOSGEnvironment_(self):
1260          """
1261          Returns part of a job script which is prepares
1262          the execution environment and which is common for all CMS jobs.
1263          """
1264 <        txt = '\n'
1265 <        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1266 <        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1267 <        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1268 <        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1098 <        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1264 >        txt = '    echo ">>> setup CMS OSG environment:"\n'
1265 >        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1266 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1267 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1268 >        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1269          txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1270 <        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1271 <        txt += '   else\n'
1272 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1273 <        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1274 <        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1275 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1106 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1107 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1108 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1109 <        txt += '       exit 1\n'
1270 >        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1271 >        txt += '    else\n'
1272 >        txt += '        echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1273 >        txt += '        echo "JOB_EXIT_STATUS = 10020"\n'
1274 >        txt += '        echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1275 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1276          txt += '\n'
1277 <        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1278 <        txt += '       cd $RUNTIME_AREA\n'
1279 <        txt += '       /bin/rm -rf $WORKING_DIR\n'
1280 <        txt += '       if [ -d $WORKING_DIR ] ;then\n'
1281 <        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1282 <        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1283 <        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1284 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1285 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
1286 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1121 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1122 <        txt += '       fi\n'
1277 >        txt += '        cd $RUNTIME_AREA\n'
1278 >        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1279 >        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
1280 >        txt += '        /bin/rm -rf $WORKING_DIR\n'
1281 >        txt += '        if [ -d $WORKING_DIR ] ;then\n'
1282 >        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1283 >        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1284 >        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1285 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1286 >        txt += '        fi\n'
1287          txt += '\n'
1288 <        txt += '       exit 1\n'
1289 <        txt += '   fi\n'
1288 >        txt += '        exit 1\n'
1289 >        txt += '    fi\n'
1290          txt += '\n'
1291 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1292 <        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1291 >        txt += '    echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1292 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1293  
1294          return txt
1295 <
1295 >
1296      ### OLI_DANIELE
1297      def wsSetupCMSLCGEnvironment_(self):
1298          """
1299          Returns part of a job script which is prepares
1300          the execution environment and which is common for all CMS jobs.
1301          """
1302 <        txt  = '   \n'
1303 <        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
1304 <        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
1305 <        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1306 <        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1307 <        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1308 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1309 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1310 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1311 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1312 <        txt += '       exit 1\n'
1313 <        txt += '   else\n'
1314 <        txt += '       echo "Sourcing environment... "\n'
1315 <        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1316 <        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1317 <        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1318 <        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1319 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1320 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1321 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1322 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1323 <        txt += '           exit 1\n'
1324 <        txt += '       fi\n'
1325 <        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1326 <        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1327 <        txt += '       result=$?\n'
1328 <        txt += '       if [ $result -ne 0 ]; then\n'
1329 <        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1330 <        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1331 <        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1332 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1333 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1334 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1335 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1336 <        txt += '           exit 1\n'
1337 <        txt += '       fi\n'
1338 <        txt += '   fi\n'
1339 <        txt += '   \n'
1340 <        txt += '   string=`cat /etc/redhat-release`\n'
1341 <        txt += '   echo $string\n'
1342 <        txt += '   if [[ $string = *alhalla* ]]; then\n'
1343 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1344 <        txt += '   elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
1345 <        txt += '       export SCRAM_ARCH=slc3_ia32_gcc323\n'
1346 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1347 <        txt += '   else\n'
1348 <        txt += '       echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
1349 <        txt += '       echo "JOB_EXIT_STATUS = 10033"\n'
1350 <        txt += '       echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
1351 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1352 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1353 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1354 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1355 <        txt += '       exit 1\n'
1356 <        txt += '   fi\n'
1357 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1358 <        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1302 >        txt = '    echo ">>> setup CMS LCG environment:"\n'
1303 >        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1304 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1305 >        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1306 >        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1307 >        txt += '        echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1308 >        txt += '        echo "JOB_EXIT_STATUS = 10031" \n'
1309 >        txt += '        echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1310 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1311 >        txt += '        exit 1\n'
1312 >        txt += '    else\n'
1313 >        txt += '        echo "Sourcing environment... "\n'
1314 >        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1315 >        txt += '            echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1316 >        txt += '            echo "JOB_EXIT_STATUS = 10020"\n'
1317 >        txt += '            echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1318 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1319 >        txt += '            exit 1\n'
1320 >        txt += '        fi\n'
1321 >        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1322 >        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1323 >        txt += '        result=$?\n'
1324 >        txt += '        if [ $result -ne 0 ]; then\n'
1325 >        txt += '            echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1326 >        txt += '            echo "JOB_EXIT_STATUS = 10032"\n'
1327 >        txt += '            echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1328 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1329 >        txt += '            exit 1\n'
1330 >        txt += '        fi\n'
1331 >        txt += '    fi\n'
1332 >        txt += '    \n'
1333 >        txt += '    echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1334 >        return txt
1335 >
1336 >    ### FEDE FOR DBS OUTPUT PUBLICATION
1337 >    def modifyReport(self, nj):
1338 >        """
1339 >        insert the part of the script that modifies the FrameworkJob Report
1340 >        """
1341 >
1342 >        txt = ''
1343 >        try:
1344 >            publish_data = int(self.cfg_params['USER.publish_data'])
1345 >        except KeyError:
1346 >            publish_data = 0
1347 >        if (publish_data == 1):
1348 >            txt += 'echo ">>> Modify Job Report:" \n'
1349 >            ################ FEDE FOR DBS2 #############################################
1350 >            txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1351 >            #############################################################################
1352 >
1353 >            txt += 'if [ -z "$SE" ]; then\n'
1354 >            txt += '    SE="" \n'
1355 >            txt += 'fi \n'
1356 >            txt += 'if [ -z "$SE_PATH" ]; then\n'
1357 >            txt += '    SE_PATH="" \n'
1358 >            txt += 'fi \n'
1359 >            txt += 'echo "SE = $SE"\n'
1360 >            txt += 'echo "SE_PATH = $SE_PATH"\n'
1361 >
1362 >            processedDataset = self.cfg_params['USER.publish_data_name']
1363 >            txt += 'ProcessedDataset='+processedDataset+'\n'
1364 >            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1365 >            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1366 >            #### FEDE: added slash in LFN ##############
1367 >            txt += '    FOR_LFN=/copy_problems/ \n'
1368 >            txt += 'else \n'
1369 >            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1370 >            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1371 >            txt += '    FOR_LFN=/store$tmp \n'
1372 >            txt += 'fi \n'
1373 >            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1374 >            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1375 >            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1376 >            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1377 >            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1378 >            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1379 >            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1380 >
1381 >            txt += 'modifyReport_result=$?\n'
1382 >            txt += 'echo modifyReport_result = $modifyReport_result\n'
1383 >            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1384 >            txt += '    exit_status=1\n'
1385 >            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1386 >            txt += 'else\n'
1387 >            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1388 >            txt += 'fi\n'
1389 >        else:
1390 >            txt += 'echo "no data publication required"\n'
1391 >        return txt
1392 >
1393 >    def cleanEnv(self):
1394 >        txt = ''
1395 >        txt += 'if [ $middleware == OSG ]; then\n'
1396 >        txt += '    cd $RUNTIME_AREA\n'
1397 >        txt += '    echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1398 >        txt += '    echo ">>> Remove working directory: $WORKING_DIR"\n'
1399 >        txt += '    /bin/rm -rf $WORKING_DIR\n'
1400 >        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1401 >        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1402 >        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
1403 >        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1404 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1405 >        txt += '    fi\n'
1406 >        txt += 'fi\n'
1407 >        txt += '\n'
1408          return txt
1409  
1410      def setParam_(self, param, value):
# Line 1202 | Line 1415 | class Cmssw(JobType):
1415  
1416      def setTaskid_(self):
1417          self._taskId = self.cfg_params['taskId']
1418 <        
1418 >
1419      def getTaskid(self):
1420          return self._taskId
1421  
1209 #######################################################################
1422      def uniquelist(self, old):
1423          """
1424          remove duplicates from a list
# Line 1215 | Line 1427 | class Cmssw(JobType):
1427          for e in old:
1428              nd[e]=0
1429          return nd.keys()
1430 +
1431 +
1432 +    def checkOut(self, limit):
1433 +        """
1434 +        check the dimension of the output files
1435 +        """
1436 +        txt += 'echo ">>> Starting output sandbox limit check :"\n'
1437 +        allOutFiles = ""
1438 +        listOutFiles = []
1439 +        for fileOut in (self.output_file+self.output_file_sandbox):
1440 +             if fileOut.find('crab_fjr') == -1:
1441 +                 allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1442 +                 listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1443 +        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1444 +        txt += 'ls -gGhrta;\n'
1445 +        txt += 'sum=0;\n'
1446 +        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1447 +        txt += '    if [ -e $file ]; then\n'
1448 +        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1449 +        txt += '        sum=`expr $sum + $tt`\n'
1450 +        txt += '    else\n'
1451 +        txt += '        echo "WARNING: output file $file not found!"\n'
1452 +        txt += '    fi\n'
1453 +        txt += 'done\n'
1454 +        txt += 'echo "Total Output dimension: $sum";\n'
1455 +        txt += 'limit='+str(limit)+';\n'
1456 +        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1457 +        txt += 'if [ $limit -lt $sum ]; then\n'
1458 +        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1459 +        txt += '    echo "         checking the output file sizes..."\n'
1460 +        """
1461 +        txt += '    dim=0;\n'
1462 +        txt += '    exclude=0;\n'
1463 +        txt += '    for files in '+str(allOutFiles)+' ; do\n'
1464 +        txt += '        sumTemp=0;\n'
1465 +        txt += '        for file2 in '+str(allOutFiles)+' ; do\n'
1466 +        txt += '            if [ $file != $file2 ]; then\n'
1467 +        txt += '                tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1468 +        txt += '                sumTemp=`expr $sumTemp + $tt`;\n'
1469 +        txt += '            fi\n'
1470 +        txt += '        done\n'
1471 +        txt += '        if [ $sumTemp -lt $limit ]; then\n'
1472 +        txt += '            if [ $dim -lt $sumTemp ]; then\n'
1473 +        txt += '                dim=$sumTemp;\n'
1474 +        txt += '                exclude=$file;\n'
1475 +        txt += '            fi\n'
1476 +        txt += '        fi\n'
1477 +        txt += '    done\n'
1478 +        txt += '    echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1479 +        """
1480 +        txt += '    tot=0;\n'
1481 +        txt += '    for file2 in '+str(allOutFiles)+' ; do\n'
1482 +        txt += '        tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1483 +        txt += '        tot=`expr $tot + $tt`;\n'
1484 +        txt += '        if [ $limit -lt $tot ]; then\n'
1485 +        txt += '            tot=`expr $tot - $tt`;\n'
1486 +        txt += '            fileLast=$file;\n'
1487 +        txt += '            break;\n'
1488 +        txt += '        fi\n'
1489 +        txt += '    done\n'
1490 +        txt += '    echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1491 +        txt += '    flag=0;\n'
1492 +        txt += '    for filess in '+str(allOutFiles)+' ; do\n'
1493 +        txt += '        if [ $fileLast = $filess ]; then\n'
1494 +        txt += '            flag=1;\n'
1495 +        txt += '        fi\n'
1496 +        txt += '        if [ $flag -eq 1 ]; then\n'
1497 +        txt += '            rm -f $filess;\n'
1498 +        txt += '        fi\n'
1499 +        txt += '    done\n'
1500 +        txt += '    ls -agGhrt;\n'
1501 +        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1502 +        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1503 +        txt += '    exit_status=70000;\n'
1504 +        txt += 'else'
1505 +        txt += '    echo "Total Output dimension $sum is fine.";\n'
1506 +        txt += 'fi\n'
1507 +        txt += 'echo "Ending output sandbox limit check"\n'
1508 +        return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines