ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.55 by slacapra, Wed Nov 22 14:57:26 2006 UTC vs.
Revision 1.139 by ewv, Tue Nov 20 17:06:50 2007 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 < import math
5 > from BlackWhiteListParser import BlackWhiteListParser
6   import common
7 import PsetManipulator  
8
9 import DBSInfo
10 import DataDiscovery
11 import DataLocation
7   import Scram
8  
9 < import glob, os, string, re
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12      def __init__(self, cfg_params, ncjobs):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15  
21        # Marco.
16          self._params = {}
17          self.cfg_params = cfg_params
18  
19 +        # init BlackWhiteListParser
20 +        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21 +
22 +        try:
23 +            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24 +        except KeyError:
25 +            self.MaxTarBallSize = 9.5
26 +
27          # number of jobs requested to be created, limit obj splitting
28          self.ncjobs = ncjobs
29  
30          log = common.logger
31 <        
31 >
32          self.scram = Scram.Scram(cfg_params)
31        scramArea = ''
33          self.additional_inbox_files = []
34          self.scriptExe = ''
35          self.executable = ''
36 +        self.executable_arch = self.scram.getArch()
37          self.tgz_name = 'default.tgz'
38 <        self.pset = ''      #scrip use case Da  
38 >        self.additional_tgz_name = 'additional.tgz'
39 >        self.scriptName = 'CMSSW.sh'
40 >        self.pset = ''      #scrip use case Da
41          self.datasetPath = '' #scrip use case Da
42  
43          # set FJR file name
44          self.fjrFileName = 'crab_fjr.xml'
45  
46          self.version = self.scram.getSWVersion()
47 +
48 +        #
49 +        # Try to block creation in case of arch/version mismatch
50 +        #
51 +
52 +        a = string.split(self.version, "_")
53 +
54 +        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55 +            msg = "Warning: You are using %s version of CMSSW  with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
56 +            common.logger.message(msg)
57 +        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58 +            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59 +            raise CrabException(msg)
60 +
61          common.taskDB.setDict('codeVersion',self.version)
62          self.setParam_('application', self.version)
63  
64          ### collect Data cards
65 +
66          try:
67              tmp =  cfg_params['CMSSW.datasetpath']
68              log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
# Line 54 | Line 73 | class Cmssw(JobType):
73                  self.datasetPath = tmp
74                  self.selectNoInput = 0
75          except KeyError:
76 <            msg = "Error: datasetpath not defined "  
76 >            msg = "Error: datasetpath not defined "
77              raise CrabException(msg)
78  
79          # ML monitoring
# Line 63 | Line 82 | class Cmssw(JobType):
82              self.setParam_('dataset', 'None')
83              self.setParam_('owner', 'None')
84          else:
85 <            datasetpath_split = self.datasetPath.split("/")
86 <            self.setParam_('dataset', datasetpath_split[1])
87 <            self.setParam_('owner', datasetpath_split[-1])
85 >            try:
86 >                datasetpath_split = self.datasetPath.split("/")
87 >                # standard style
88 >                self.setParam_('datasetFull', self.datasetPath)
89 >                self.setParam_('dataset', datasetpath_split[1])
90 >                self.setParam_('owner', datasetpath_split[2])
91 >            except:
92 >                self.setParam_('dataset', self.datasetPath)
93 >                self.setParam_('owner', self.datasetPath)
94  
95          self.setTaskid_()
96          self.setParam_('taskId', self.cfg_params['taskId'])
# Line 89 | Line 114 | class Cmssw(JobType):
114          try:
115              self.pset = cfg_params['CMSSW.pset']
116              log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
117 <            if self.pset.lower() != 'none' :
117 >            if self.pset.lower() != 'none' :
118                  if (not os.path.exists(self.pset)):
119                      raise CrabException("User defined PSet file "+self.pset+" does not exist")
120              else:
# Line 107 | Line 132 | class Cmssw(JobType):
132          # other output files to be returned via sandbox or copied to SE
133          try:
134              self.output_file = []
110
111
135              tmp = cfg_params['CMSSW.output_file']
136              if tmp != '':
137                  tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
# Line 118 | Line 141 | class Cmssw(JobType):
141                      self.output_file.append(tmp)
142                      pass
143              else:
144 <                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
144 >                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
145                  pass
146              pass
147          except KeyError:
148 <            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
148 >            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
149              pass
150  
151          # script_exe file as additional file in inputSandbox
# Line 130 | Line 153 | class Cmssw(JobType):
153              self.scriptExe = cfg_params['USER.script_exe']
154              if self.scriptExe != '':
155                 if not os.path.isfile(self.scriptExe):
156 <                  msg ="WARNING. file "+self.scriptExe+" not found"
156 >                  msg ="ERROR. file "+self.scriptExe+" not found"
157                    raise CrabException(msg)
158                 self.additional_inbox_files.append(string.strip(self.scriptExe))
159          except KeyError:
160              self.scriptExe = ''
161 +
162          #CarlosDaniele
163          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
164 <           msg ="WARNING. script_exe  not defined"
164 >           msg ="Error. script_exe  not defined"
165             raise CrabException(msg)
166  
167          ## additional input files
# Line 147 | Line 171 | class Cmssw(JobType):
171                  tmp = string.strip(tmp)
172                  dirname = ''
173                  if not tmp[0]=="/": dirname = "."
174 <                files = glob.glob(os.path.join(dirname, tmp))
174 >                files = []
175 >                if string.find(tmp,"*")>-1:
176 >                    files = glob.glob(os.path.join(dirname, tmp))
177 >                    if len(files)==0:
178 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
179 >                else:
180 >                    files.append(tmp)
181                  for file in files:
182                      if not os.path.exists(file):
183                          raise CrabException("Additional input file not found: "+file)
184                      pass
185 +                    # fname = string.split(file, '/')[-1]
186 +                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
187 +                    # shutil.copyfile(file, storedFile)
188                      self.additional_inbox_files.append(string.strip(file))
189                  pass
190              pass
# Line 173 | Line 206 | class Cmssw(JobType):
206          except KeyError:
207              self.eventsPerJob = -1
208              self.selectEventsPerJob = 0
209 <    
209 >
210          ## number of jobs
211          try:
212              self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
# Line 189 | Line 222 | class Cmssw(JobType):
222              self.total_number_of_events = 0
223              self.selectTotalNumberEvents = 0
224  
225 <        if self.pset != None: #CarlosDaniele
225 >        if self.pset != None: #CarlosDaniele
226               if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
227                   msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
228                   raise CrabException(msg)
# Line 210 | Line 243 | class Cmssw(JobType):
243          except KeyError:
244              self.sourceSeedVtx = None
245              common.logger.debug(5,"No vertex seed given")
246 +
247 +        try:
248 +            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
249 +        except KeyError:
250 +            self.sourceSeedG4 = None
251 +            common.logger.debug(5,"No g4 sim hits seed given")
252 +
253 +        try:
254 +            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
255 +        except KeyError:
256 +            self.sourceSeedMix = None
257 +            common.logger.debug(5,"No mix seed given")
258 +
259 +        try:
260 +            self.firstRun = int(cfg_params['CMSSW.first_run'])
261 +        except KeyError:
262 +            self.firstRun = None
263 +            common.logger.debug(5,"No first run given")
264          if self.pset != None: #CarlosDaniele
265 <            self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
265 >            import PsetManipulator as pp
266 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
267  
268          #DBSDLS-start
269 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
269 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
270          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
271          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
272          self.jobDestination=[]  # Site destination(s) for each job (list of lists)
# Line 223 | Line 275 | class Cmssw(JobType):
275          blockSites = {}
276          if self.datasetPath:
277              blockSites = self.DataDiscoveryAndLocation(cfg_params)
278 <        #DBSDLS-end          
278 >        #DBSDLS-end
279  
280          self.tgzNameWithPath = self.getTarBall(self.executable)
281 <    
281 >
282          ## Select Splitting
283 <        if self.selectNoInput:
283 >        if self.selectNoInput:
284              if self.pset == None: #CarlosDaniele
285                  self.jobSplittingForScript()
286              else:
287                  self.jobSplittingNoInput()
288 <        else: self.jobSplittingByBlocks(blockSites)
288 >        else:
289 >            self.jobSplittingByBlocks(blockSites)
290  
291          # modify Pset
292          if self.pset != None: #CarlosDaniele
293              try:
294                  if (self.datasetPath): # standard job
295                      # allow to processa a fraction of events in a file
296 <                    self.PsetEdit.inputModule("INPUT")
297 <                    self.PsetEdit.maxEvent("INPUTMAXEVENTS")
298 <                    self.PsetEdit.skipEvent("INPUTSKIPEVENTS")
296 >                    PsetEdit.inputModule("INPUTFILE")
297 >                    PsetEdit.maxEvent(0)
298 >                    PsetEdit.skipEvent(0)
299                  else:  # pythia like job
300 <                    self.PsetEdit.maxEvent(self.eventsPerJob)
300 >                    PsetEdit.maxEvent(self.eventsPerJob)
301 >                    if (self.firstRun):
302 >                        PsetEdit.pythiaFirstRun(0)  #First Run
303                      if (self.sourceSeed) :
304 <                        self.PsetEdit.pythiaSeed("INPUT")
304 >                        PsetEdit.pythiaSeed(0)
305                          if (self.sourceSeedVtx) :
306 <                            self.PsetEdit.pythiaSeedVtx("INPUTVTX")
306 >                            PsetEdit.vtxSeed(0)
307 >                        if (self.sourceSeedG4) :
308 >                            PsetEdit.g4Seed(0)
309 >                        if (self.sourceSeedMix) :
310 >                            PsetEdit.mixSeed(0)
311                  # add FrameworkJobReport to parameter-set
312 <                self.PsetEdit.addCrabFJR(self.fjrFileName)
313 <                self.PsetEdit.psetWriter(self.configFilename())
312 >                PsetEdit.addCrabFJR(self.fjrFileName)
313 >                PsetEdit.psetWriter(self.configFilename())
314              except:
315                  msg='Error while manipuliating ParameterSet: exiting...'
316                  raise CrabException(msg)
317  
318      def DataDiscoveryAndLocation(self, cfg_params):
319  
320 +        import DataDiscovery
321 +        import DataLocation
322          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
323  
324          datasetPath=self.datasetPath
325  
265        ## TODO
266        dataTiersList = ""
267        dataTiers = dataTiersList.split(',')
268
326          ## Contact the DBS
327 <        common.logger.message("Contacting DBS...")
327 >        common.logger.message("Contacting Data Discovery Services ...")
328          try:
329 <            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, dataTiers, cfg_params)
329 >
330 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
331              self.pubdata.fetchDBSInfo()
332  
333          except DataDiscovery.NotExistingDatasetError, ex :
334              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
335              raise CrabException(msg)
278
336          except DataDiscovery.NoDataTierinProvenanceError, ex :
337              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
338              raise CrabException(msg)
339          except DataDiscovery.DataDiscoveryError, ex:
340 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
340 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
341              raise CrabException(msg)
342  
286        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
287        ## self.DBSPaths=self.pubdata.getDBSPaths()
288        common.logger.message("Required data are :"+self.datasetPath)
289
343          self.filesbyblock=self.pubdata.getFiles()
344          self.eventsbyblock=self.pubdata.getEventsPerBlock()
345          self.eventsbyfile=self.pubdata.getEventsPerFile()
293        # print str(self.filesbyblock)
294        # print 'self.eventsbyfile',len(self.eventsbyfile)
295        # print str(self.eventsbyfile)
346  
347          ## get max number of events
348 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
299 <        common.logger.message("The number of available events is %s\n"%self.maxEvents)
348 >        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
349  
301        common.logger.message("Contacting DLS...")
350          ## Contact the DLS and build a list of sites hosting the fileblocks
351          try:
352              dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
# Line 306 | Line 354 | class Cmssw(JobType):
354          except DataLocation.DataLocationError , ex:
355              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
356              raise CrabException(msg)
357 <        
357 >
358  
359          sites = dataloc.getSites()
360          allSites = []
361          listSites = sites.values()
362 <        for list in listSites:
363 <            for oneSite in list:
362 >        for listSite in listSites:
363 >            for oneSite in listSite:
364                  allSites.append(oneSite)
365          allSites = self.uniquelist(allSites)
366  
367 <        common.logger.message("Sites ("+str(len(allSites))+") hosting part/all of dataset: "+str(allSites))
368 <        common.logger.debug(6, "List of Sites: "+str(allSites))
367 >        # screen output
368 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
369 >
370          return sites
371 <    
371 >
372      def jobSplittingByBlocks(self, blockSites):
373          """
374          Perform job splitting. Jobs run over an integer number of files
# Line 369 | Line 418 | class Cmssw(JobType):
418              totalNumberOfJobs = 999999999
419          else :
420              totalNumberOfJobs = self.ncjobs
421 <            
421 >
422  
423          blocks = blockSites.keys()
424          blockCount = 0
# Line 379 | Line 428 | class Cmssw(JobType):
428          jobCount = 0
429          list_of_lists = []
430  
431 +        # list tracking which jobs are in which jobs belong to which block
432 +        jobsOfBlock = {}
433 +
434          # ---- Iterate over the blocks in the dataset until ---- #
435          # ---- we've met the requested total # of events    ---- #
436          while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
437              block = blocks[blockCount]
438              blockCount += 1
439 <            
439 >            if block not in jobsOfBlock.keys() :
440 >                jobsOfBlock[block] = []
441  
442 <            numEventsInBlock = self.eventsbyblock[block]
443 <            common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
444 <            
445 <            files = self.filesbyblock[block]
446 <            numFilesInBlock = len(files)
447 <            if (numFilesInBlock <= 0):
448 <                continue
449 <            fileCount = 0
450 <
451 <            # ---- New block => New job ---- #
452 <            parString = "\\{"
453 <            # counter for number of events in files currently worked on
454 <            filesEventCount = 0
455 <            # flag if next while loop should touch new file
456 <            newFile = 1
457 <            # job event counter
458 <            jobSkipEventCount = 0
459 <            
460 <            # ---- Iterate over the files in the block until we've met the requested ---- #
461 <            # ---- total # of events or we've gone over all the files in this block  ---- #
462 <            while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
463 <                file = files[fileCount]
464 <                if newFile :
465 <                    try:
466 <                        numEventsInFile = self.eventsbyfile[file]
467 <                        common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
468 <                        # increase filesEventCount
469 <                        filesEventCount += numEventsInFile
470 <                        # Add file to current job
471 <                        parString += '\\\"' + file + '\\\"\,'
472 <                        newFile = 0
473 <                    except KeyError:
474 <                        common.logger.message("File "+str(file)+" has unknown number of events: skipping")
475 <                        
476 <
477 <                # if less events in file remain than eventsPerJobRequested
478 <                if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
479 <                    # if last file in block
480 <                    if ( fileCount == numFilesInBlock-1 ) :
481 <                        # end job using last file, use remaining events in block
442 >            if self.eventsbyblock.has_key(block) :
443 >                numEventsInBlock = self.eventsbyblock[block]
444 >                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
445 >
446 >                files = self.filesbyblock[block]
447 >                numFilesInBlock = len(files)
448 >                if (numFilesInBlock <= 0):
449 >                    continue
450 >                fileCount = 0
451 >
452 >                # ---- New block => New job ---- #
453 >                parString = ""
454 >                # counter for number of events in files currently worked on
455 >                filesEventCount = 0
456 >                # flag if next while loop should touch new file
457 >                newFile = 1
458 >                # job event counter
459 >                jobSkipEventCount = 0
460 >
461 >                # ---- Iterate over the files in the block until we've met the requested ---- #
462 >                # ---- total # of events or we've gone over all the files in this block  ---- #
463 >                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
464 >                    file = files[fileCount]
465 >                    if newFile :
466 >                        try:
467 >                            numEventsInFile = self.eventsbyfile[file]
468 >                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
469 >                            # increase filesEventCount
470 >                            filesEventCount += numEventsInFile
471 >                            # Add file to current job
472 >                            parString += '\\\"' + file + '\\\"\,'
473 >                            newFile = 0
474 >                        except KeyError:
475 >                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
476 >
477 >
478 >                    # if less events in file remain than eventsPerJobRequested
479 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
480 >                        # if last file in block
481 >                        if ( fileCount == numFilesInBlock-1 ) :
482 >                            # end job using last file, use remaining events in block
483 >                            # close job and touch new file
484 >                            fullString = parString[:-2]
485 >                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
486 >                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
487 >                            self.jobDestination.append(blockSites[block])
488 >                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
489 >                            # fill jobs of block dictionary
490 >                            jobsOfBlock[block].append(jobCount+1)
491 >                            # reset counter
492 >                            jobCount = jobCount + 1
493 >                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
494 >                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
495 >                            jobSkipEventCount = 0
496 >                            # reset file
497 >                            parString = ""
498 >                            filesEventCount = 0
499 >                            newFile = 1
500 >                            fileCount += 1
501 >                        else :
502 >                            # go to next file
503 >                            newFile = 1
504 >                            fileCount += 1
505 >                    # if events in file equal to eventsPerJobRequested
506 >                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
507                          # close job and touch new file
508                          fullString = parString[:-2]
509 <                        fullString += '\\}'
510 <                        list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
433 <                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
509 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
510 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
511                          self.jobDestination.append(blockSites[block])
512                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
513 +                        jobsOfBlock[block].append(jobCount+1)
514                          # reset counter
515                          jobCount = jobCount + 1
516 <                        totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
517 <                        eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
516 >                        totalEventCount = totalEventCount + eventsPerJobRequested
517 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
518                          jobSkipEventCount = 0
519                          # reset file
520 <                        parString = "\\{"
520 >                        parString = ""
521                          filesEventCount = 0
522                          newFile = 1
523                          fileCount += 1
524 +
525 +                    # if more events in file remain than eventsPerJobRequested
526                      else :
527 <                        # go to next file
528 <                        newFile = 1
529 <                        fileCount += 1
530 <                # if events in file equal to eventsPerJobRequested
531 <                elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
532 <                    # close job and touch new file
533 <                    fullString = parString[:-2]
534 <                    fullString += '\\}'
535 <                    list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
536 <                    common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
537 <                    self.jobDestination.append(blockSites[block])
538 <                    common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
539 <                    # reset counter
540 <                    jobCount = jobCount + 1
541 <                    totalEventCount = totalEventCount + eventsPerJobRequested
542 <                    eventsRemaining = eventsRemaining - eventsPerJobRequested
543 <                    jobSkipEventCount = 0
544 <                    # reset file
545 <                    parString = "\\{"
546 <                    filesEventCount = 0
467 <                    newFile = 1
468 <                    fileCount += 1
469 <                    
470 <                # if more events in file remain than eventsPerJobRequested
471 <                else :
472 <                    # close job but don't touch new file
473 <                    fullString = parString[:-2]
474 <                    fullString += '\\}'
475 <                    list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
476 <                    common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
477 <                    self.jobDestination.append(blockSites[block])
478 <                    common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
479 <                    # increase counter
480 <                    jobCount = jobCount + 1
481 <                    totalEventCount = totalEventCount + eventsPerJobRequested
482 <                    eventsRemaining = eventsRemaining - eventsPerJobRequested
483 <                    # calculate skip events for last file
484 <                    # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
485 <                    jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
486 <                    # remove all but the last file
487 <                    filesEventCount = self.eventsbyfile[file]
488 <                    parString = "\\{"
489 <                    parString += '\\\"' + file + '\\\"\,'
490 <                pass # END if
491 <            pass # END while (iterate over files in the block)
527 >                        # close job but don't touch new file
528 >                        fullString = parString[:-2]
529 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
530 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
531 >                        self.jobDestination.append(blockSites[block])
532 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
533 >                        jobsOfBlock[block].append(jobCount+1)
534 >                        # increase counter
535 >                        jobCount = jobCount + 1
536 >                        totalEventCount = totalEventCount + eventsPerJobRequested
537 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
538 >                        # calculate skip events for last file
539 >                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
540 >                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
541 >                        # remove all but the last file
542 >                        filesEventCount = self.eventsbyfile[file]
543 >                        parString = ""
544 >                        parString += '\\\"' + file + '\\\"\,'
545 >                    pass # END if
546 >                pass # END while (iterate over files in the block)
547          pass # END while (iterate over blocks in the dataset)
548          self.ncjobs = self.total_number_of_jobs = jobCount
549          if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
550              common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
551 <        common.logger.message("\n"+str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
552 <        
551 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
552 >
553 >        # screen output
554 >        screenOutput = "List of jobs and available destination sites:\n\n"
555 >
556 >        # keep trace of block with no sites to print a warning at the end
557 >        noSiteBlock = []
558 >        bloskNoSite = []
559 >
560 >        blockCounter = 0
561 >        for block in blocks:
562 >            if block in jobsOfBlock.keys() :
563 >                blockCounter += 1
564 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
565 >                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
566 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
567 >                    bloskNoSite.append( blockCounter )
568 >
569 >        common.logger.message(screenOutput)
570 >        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
571 >            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
572 >            virgola = ""
573 >            if len(bloskNoSite) > 1:
574 >                virgola = ","
575 >            for block in bloskNoSite:
576 >                msg += ' ' + str(block) + virgola
577 >            msg += '\n               Related jobs:\n                 '
578 >            virgola = ""
579 >            if len(noSiteBlock) > 1:
580 >                virgola = ","
581 >            for range_jobs in noSiteBlock:
582 >                msg += str(range_jobs) + virgola
583 >            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
584 >            common.logger.message(msg)
585 >
586          self.list_of_args = list_of_lists
587          return
588  
# Line 503 | Line 591 | class Cmssw(JobType):
591          Perform job splitting based on number of event per job
592          """
593          common.logger.debug(5,'Splitting per events')
594 <        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
595 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
596 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
594 >
595 >        if (self.selectEventsPerJob):
596 >            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
597 >        if (self.selectNumberOfJobs):
598 >            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
599 >        if (self.selectTotalNumberEvents):
600 >            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
601  
602          if (self.total_number_of_events < 0):
603              msg='Cannot split jobs per Events with "-1" as total number of events'
604              raise CrabException(msg)
605  
606          if (self.selectEventsPerJob):
607 <            self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
607 >            if (self.selectTotalNumberEvents):
608 >                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
609 >            elif(self.selectNumberOfJobs) :
610 >                self.total_number_of_jobs =self.theNumberOfJobs
611 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
612 >
613          elif (self.selectNumberOfJobs) :
614              self.total_number_of_jobs = self.theNumberOfJobs
615              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
# Line 532 | Line 629 | class Cmssw(JobType):
629          self.list_of_args = []
630          for i in range(self.total_number_of_jobs):
631              ## Since there is no input, any site is good
632 <           # self.jobDestination.append(["Any"])
633 <            self.jobDestination.append([""]) #must be empty to write correctly the xml
632 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
633 >            args=[]
634 >            if (self.firstRun):
635 >                ## pythia first run
636 >                args.append(str(self.firstRun)+str(i))
637              if (self.sourceSeed):
638 +                args.append(str(self.sourceSeed)+str(i))
639                  if (self.sourceSeedVtx):
640 <                    ## pythia + vtx random seed
641 <                    self.list_of_args.append([
642 <                                              str(self.sourceSeed)+str(i),
643 <                                              str(self.sourceSeedVtx)+str(i)
644 <                                              ])
645 <                else:
646 <                    ## only pythia random seed
647 <                    self.list_of_args.append([(str(self.sourceSeed)+str(i))])
648 <            else:
649 <                ## no random seed
650 <                self.list_of_args.append([str(i)])
651 <        #print self.list_of_args
640 >                    ## + vtx random seed
641 >                    args.append(str(self.sourceSeedVtx)+str(i))
642 >                if (self.sourceSeedG4):
643 >                    ## + G4 random seed
644 >                    args.append(str(self.sourceSeedG4)+str(i))
645 >                if (self.sourceSeedMix):
646 >                    ## + Mix random seed
647 >                    args.append(str(self.sourceSeedMix)+str(i))
648 >                pass
649 >            pass
650 >            self.list_of_args.append(args)
651 >        pass
652 >
653 >        # print self.list_of_args
654  
655          return
656  
# Line 576 | Line 679 | class Cmssw(JobType):
679          return
680  
681      def split(self, jobParams):
682 <
682 >
683          common.jobDB.load()
684          #### Fabio
685          njobs = self.total_number_of_jobs
# Line 584 | Line 687 | class Cmssw(JobType):
687          # create the empty structure
688          for i in range(njobs):
689              jobParams.append("")
690 <        
690 >
691          for job in range(njobs):
692              jobParams[job] = arglist[job]
693              # print str(arglist[job])
# Line 595 | Line 698 | class Cmssw(JobType):
698  
699          common.jobDB.save()
700          return
701 <    
701 >
702      def getJobTypeArguments(self, nj, sched):
703          result = ''
704          for i in common.jobDB.arguments(nj):
705              result=result+str(i)+" "
706          return result
707 <  
707 >
708      def numberOfJobs(self):
709          # Fabio
710          return self.total_number_of_jobs
# Line 610 | Line 713 | class Cmssw(JobType):
713          """
714          Return the TarBall with lib and exe
715          """
716 <        
716 >
717          # if it exist, just return it
718 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
718 >        #
719 >        # Marco. Let's start to use relative path for Boss XML files
720 >        #
721 >        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
722          if os.path.exists(self.tgzNameWithPath):
723              return self.tgzNameWithPath
724  
# Line 626 | Line 732 | class Cmssw(JobType):
732          # First of all declare the user Scram area
733          swArea = self.scram.getSWArea_()
734          #print "swArea = ", swArea
735 <        swVersion = self.scram.getSWVersion()
736 <        #print "swVersion = ", swVersion
735 >        # swVersion = self.scram.getSWVersion()
736 >        # print "swVersion = ", swVersion
737          swReleaseTop = self.scram.getReleaseTop_()
738          #print "swReleaseTop = ", swReleaseTop
739 <        
739 >
740          ## check if working area is release top
741          if swReleaseTop == '' or swArea == swReleaseTop:
742              return
743  
744 <        filesToBeTarred = []
745 <        ## First find the executable
746 <        if (self.executable != ''):
747 <            exeWithPath = self.scram.findFile_(executable)
748 < #           print exeWithPath
749 <            if ( not exeWithPath ):
750 <                raise CrabException('User executable '+executable+' not found')
751 <
752 <            ## then check if it's private or not
753 <            if exeWithPath.find(swReleaseTop) == -1:
754 <                # the exe is private, so we must ship
755 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
756 <                path = swArea+'/'
757 <                exe = string.replace(exeWithPath, path,'')
758 <                filesToBeTarred.append(exe)
759 <                pass
760 <            else:
761 <                # the exe is from release, we'll find it on WN
762 <                pass
763 <
764 <        ## Now get the libraries: only those in local working area
765 <        libDir = 'lib'
766 <        lib = swArea+'/' +libDir
767 <        common.logger.debug(5,"lib "+lib+" to be tarred")
768 <        if os.path.exists(lib):
769 <            filesToBeTarred.append(libDir)
770 <
771 <        ## Now check if module dir is present
772 <        moduleDir = 'module'
773 <        if os.path.isdir(swArea+'/'+moduleDir):
774 <            filesToBeTarred.append(moduleDir)
775 <
776 <        ## Now check if the Data dir is present
777 <        dataDir = 'src/Data/'
778 <        if os.path.isdir(swArea+'/'+dataDir):
779 <            filesToBeTarred.append(dataDir)
780 <
781 <        ## copy ProdAgent dir to swArea
782 <        cmd = '\cp -rf ' + os.environ['CRABDIR'] + '/ProdAgentApi ' + swArea
783 <        cmd_out = runCommand(cmd)
784 <        if cmd_out != '':
785 <            common.logger.message('ProdAgentApi directory could not be copied to local CMSSW project directory.')
786 <            common.logger.message('No FrameworkJobreport parsing is possible on the WorkerNode.')
787 <
788 <        ## Now check if the Data dir is present
789 <        paDir = 'ProdAgentApi'
790 <        if os.path.isdir(swArea+'/'+paDir):
791 <            filesToBeTarred.append(paDir)
792 <
793 <        ## Create the tar-ball
794 <        if len(filesToBeTarred)>0:
795 <            cwd = os.getcwd()
796 <            os.chdir(swArea)
797 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
798 <            for line in filesToBeTarred:
799 <                tarcmd = tarcmd + line + ' '
800 <            cout = runCommand(tarcmd)
801 <            if not cout:
802 <                raise CrabException('Could not create tar-ball')
803 <            os.chdir(cwd)
804 <        else:
805 <            common.logger.debug(5,"No files to be to be tarred")
806 <        
744 >        import tarfile
745 >        try: # create tar ball
746 >            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
747 >            ## First find the executable
748 >            if (self.executable != ''):
749 >                exeWithPath = self.scram.findFile_(executable)
750 >                if ( not exeWithPath ):
751 >                    raise CrabException('User executable '+executable+' not found')
752 >
753 >                ## then check if it's private or not
754 >                if exeWithPath.find(swReleaseTop) == -1:
755 >                    # the exe is private, so we must ship
756 >                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
757 >                    path = swArea+'/'
758 >                    # distinguish case when script is in user project area or given by full path somewhere else
759 >                    if exeWithPath.find(path) >= 0 :
760 >                        exe = string.replace(exeWithPath, path,'')
761 >                        tar.add(path+exe,exe)
762 >                    else :
763 >                        tar.add(exeWithPath,os.path.basename(executable))
764 >                    pass
765 >                else:
766 >                    # the exe is from release, we'll find it on WN
767 >                    pass
768 >
769 >            ## Now get the libraries: only those in local working area
770 >            libDir = 'lib'
771 >            lib = swArea+'/' +libDir
772 >            common.logger.debug(5,"lib "+lib+" to be tarred")
773 >            if os.path.exists(lib):
774 >                tar.add(lib,libDir)
775 >
776 >            ## Now check if module dir is present
777 >            moduleDir = 'module'
778 >            module = swArea + '/' + moduleDir
779 >            if os.path.isdir(module):
780 >                tar.add(module,moduleDir)
781 >
782 >            ## Now check if any data dir(s) is present
783 >            swAreaLen=len(swArea)
784 >            for root, dirs, files in os.walk(swArea):
785 >                if "data" in dirs:
786 >                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
787 >                    tar.add(root+"/data",root[swAreaLen:]+"/data")
788 >
789 >            ## Add ProdAgent dir to tar
790 >            paDir = 'ProdAgentApi'
791 >            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
792 >            if os.path.isdir(pa):
793 >                tar.add(pa,paDir)
794 >
795 >            ### FEDE FOR DBS PUBLICATION
796 >            ## Add PRODCOMMON dir to tar
797 >            prodcommonDir = 'ProdCommon'
798 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
799 >            if os.path.isdir(prodcommonPath):
800 >                tar.add(prodcommonPath,prodcommonDir)
801 >            #############################
802 >
803 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
804 >            tar.close()
805 >        except :
806 >            raise CrabException('Could not create tar-ball')
807 >
808 >        ## check for tarball size
809 >        tarballinfo = os.stat(self.tgzNameWithPath)
810 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
811 >            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
812 >
813 >        ## create tar-ball with ML stuff
814 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
815 >        try:
816 >            tar = tarfile.open(self.MLtgzfile, "w:gz")
817 >            path=os.environ['CRABDIR'] + '/python/'
818 >            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
819 >                tar.add(path+file,file)
820 >            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
821 >            tar.close()
822 >        except :
823 >            raise CrabException('Could not create ML files tar-ball')
824 >
825          return
826 <        
826 >
827 >    def additionalInputFileTgz(self):
828 >        """
829 >        Put all additional files into a tar ball and return its name
830 >        """
831 >        import tarfile
832 >        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
833 >        tar = tarfile.open(tarName, "w:gz")
834 >        for file in self.additional_inbox_files:
835 >            tar.add(file,string.split(file,'/')[-1])
836 >        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
837 >        tar.close()
838 >        return tarName
839 >
840      def wsSetupEnvironment(self, nj):
841          """
842          Returns part of a job script which prepares
843          the execution environment for the job 'nj'.
844          """
845          # Prepare JobType-independent part
846 <        txt = ''
847 <  
848 <        ## OLI_Daniele at this level  middleware already known
712 <
713 <        txt += 'if [ $middleware == LCG ]; then \n'
846 >        txt = ''
847 >        txt += 'echo ">>> setup environment"\n'
848 >        txt += 'if [ $middleware == LCG ]; then \n'
849          txt += self.wsSetupCMSLCGEnvironment_()
850          txt += 'elif [ $middleware == OSG ]; then\n'
851          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
852 <        txt += '    echo "Created working directory: $WORKING_DIR"\n'
718 <        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
852 >        txt += '    if [ ! $? == 0 ] ;then\n'
853          txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
854 <        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
855 <        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
856 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
723 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
724 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
725 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
854 >        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
855 >        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
856 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
857          txt += '        exit 1\n'
858          txt += '    fi\n'
859 +        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
860          txt += '\n'
861          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
862          txt += '    cd $WORKING_DIR\n'
863 <        txt += self.wsSetupCMSOSGEnvironment_()
863 >        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
864 >        txt += self.wsSetupCMSOSGEnvironment_()
865 >        #txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
866 >        #txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
867          txt += 'fi\n'
868  
869          # Prepare JobType-specific part
870          scram = self.scram.commandName()
871          txt += '\n\n'
872 <        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
872 >        txt += 'echo ">>> specific cmssw setup environment:"\n'
873 >        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
874          txt += scram+' project CMSSW '+self.version+'\n'
875          txt += 'status=$?\n'
876          txt += 'if [ $status != 0 ] ; then\n'
877 <        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
878 <        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
879 <        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
880 <        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
745 <        txt += '   rm -f $RUNTIME_AREA/$repo \n'
746 <        txt += '   echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
747 <        txt += '   echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
748 <        ## OLI_Daniele
877 >        txt += '    echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
878 >        txt += '    echo "JOB_EXIT_STATUS = 10034"\n'
879 >        txt += '    echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
880 >        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
881          txt += '    if [ $middleware == OSG ]; then \n'
750        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
882          txt += '        cd $RUNTIME_AREA\n'
883 +        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
884 +        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
885          txt += '        /bin/rm -rf $WORKING_DIR\n'
886          txt += '        if [ -d $WORKING_DIR ] ;then\n'
887 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
888 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
889 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
890 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
758 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
759 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
760 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
887 >        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
888 >        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
889 >        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
890 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
891          txt += '        fi\n'
892          txt += '    fi \n'
893 <        txt += '   exit 1 \n'
893 >        txt += '    exit 1 \n'
894          txt += 'fi \n'
765        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
895          txt += 'cd '+self.version+'\n'
896 +        ########## FEDE FOR DBS2 ######################
897 +        txt += 'SOFTWARE_DIR=`pwd`\n'
898 +        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
899 +        ###############################################
900          ### needed grep for bug in scramv1 ###
901 +        txt += scram+' runtime -sh\n'
902          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
903 +        txt += 'echo $PATH\n'
904  
905          # Handle the arguments:
906          txt += "\n"
907          txt += "## number of arguments (first argument always jobnumber)\n"
908          txt += "\n"
774 #        txt += "narg=$#\n"
909          txt += "if [ $nargs -lt 2 ]\n"
910          txt += "then\n"
911          txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
912          txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
913          txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
914          txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
781        txt += '    rm -f $RUNTIME_AREA/$repo \n'
782        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
783        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
784        ## OLI_Daniele
915          txt += '    if [ $middleware == OSG ]; then \n'
786        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
916          txt += '        cd $RUNTIME_AREA\n'
917 +        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
918 +        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
919          txt += '        /bin/rm -rf $WORKING_DIR\n'
920          txt += '        if [ -d $WORKING_DIR ] ;then\n'
921 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
922 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
923 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
924 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
794 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
795 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
796 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
921 >        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
922 >        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
923 >        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
924 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
925          txt += '        fi\n'
926          txt += '    fi \n'
927          txt += "    exit 1\n"
# Line 802 | Line 930 | class Cmssw(JobType):
930  
931          # Prepare job-specific part
932          job = common.job_list[nj]
933 +        ### FEDE FOR DBS OUTPUT PUBLICATION
934 +        if (self.datasetPath):
935 +            txt += '\n'
936 +            txt += 'DatasetPath='+self.datasetPath+'\n'
937 +
938 +            datasetpath_split = self.datasetPath.split("/")
939 +
940 +            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
941 +            txt += 'DataTier='+datasetpath_split[2]+'\n'
942 +            txt += 'ApplicationFamily=cmsRun\n'
943 +
944 +        else:
945 +            txt += 'DatasetPath=MCDataTier\n'
946 +            txt += 'PrimaryDataset=null\n'
947 +            txt += 'DataTier=null\n'
948 +            txt += 'ApplicationFamily=MCDataTier\n'
949          if self.pset != None: #CarlosDaniele
950              pset = os.path.basename(job.configFilename())
951              txt += '\n'
952 +            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
953              if (self.datasetPath): # standard job
809                #txt += 'InputFiles=$2\n'
954                  txt += 'InputFiles=${args[1]}\n'
955                  txt += 'MaxEvents=${args[2]}\n'
956                  txt += 'SkipEvents=${args[3]}\n'
957                  txt += 'echo "Inputfiles:<$InputFiles>"\n'
958 <                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset_tmp_1.cfg\n'
958 >                txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
959                  txt += 'echo "MaxEvents:<$MaxEvents>"\n'
960 <                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" pset_tmp_1.cfg > pset_tmp_2.cfg\n'
960 >                txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
961                  txt += 'echo "SkipEvents:<$SkipEvents>"\n'
962 <                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" pset_tmp_2.cfg > pset.cfg\n'
962 >                txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
963              else:  # pythia like job
964 +                seedIndex=1
965 +                if (self.firstRun):
966 +                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
967 +                    txt += 'echo "FirstRun: <$FirstRun>"\n'
968 +                    txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
969 +                    seedIndex=seedIndex+1
970 +
971                  if (self.sourceSeed):
972 < #                    txt += 'Seed=$2\n'
973 <                    txt += 'Seed=${args[1]}\n'
974 <                    txt += 'echo "Seed: <$Seed>"\n'
975 <                    txt += 'sed "s#\<INPUT\>#$Seed#" $RUNTIME_AREA/'+pset+' > tmp.cfg\n'
972 >                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
973 >                    txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
974 >                    seedIndex=seedIndex+1
975 >                    ## the following seeds are not always present
976                      if (self.sourceSeedVtx):
977 < #                        txt += 'VtxSeed=$3\n'
827 <                        txt += 'VtxSeed=${args[2]}\n'
977 >                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
978                          txt += 'echo "VtxSeed: <$VtxSeed>"\n'
979 <                        txt += 'sed "s#INPUTVTX#$VtxSeed#" tmp.cfg > pset.cfg\n'
980 <                    else:
981 <                        txt += 'mv tmp.cfg pset.cfg\n'
982 <                else:
983 <                    txt += '# Copy untouched pset\n'
984 <                    txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n'
985 <
979 >                        txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
980 >                        seedIndex += 1
981 >                    if (self.sourceSeedG4):
982 >                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
983 >                        txt += 'echo "G4Seed: <$G4Seed>"\n'
984 >                        txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
985 >                        seedIndex += 1
986 >                    if (self.sourceSeedMix):
987 >                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
988 >                        txt += 'echo "MixSeed: <$mixSeed>"\n'
989 >                        txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
990 >                        seedIndex += 1
991 >                    pass
992 >                pass
993 >            txt += 'mv -f '+pset+' pset.cfg\n'
994  
995          if len(self.additional_inbox_files) > 0:
996 <            for file in self.additional_inbox_files:
997 <                relFile = file.split("/")[-1]
998 <                txt += 'if [ -e $RUNTIME_AREA/'+relFile+' ] ; then\n'
999 <                txt += '   cp $RUNTIME_AREA/'+relFile+' .\n'
842 <                txt += '   chmod +x '+relFile+'\n'
843 <                txt += 'fi\n'
844 <            pass
996 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
997 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
998 >            txt += 'fi\n'
999 >            pass
1000  
1001          if self.pset != None: #CarlosDaniele
847            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
848        
1002              txt += '\n'
1003              txt += 'echo "***** cat pset.cfg *********"\n'
1004              txt += 'cat pset.cfg\n'
1005              txt += 'echo "****** end pset.cfg ********"\n'
1006              txt += '\n'
1007 <            # txt += 'echo "***** cat pset1.cfg *********"\n'
1008 <            # txt += 'cat pset1.cfg\n'
1009 <            # txt += 'echo "****** end pset1.cfg ********"\n'
1007 >            ### FEDE FOR DBS OUTPUT PUBLICATION
1008 >            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1009 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
1010 >            ##############
1011 >            txt += '\n'
1012          return txt
1013  
1014 <    def wsBuildExe(self, nj):
1014 >    def wsBuildExe(self, nj=0):
1015          """
1016          Put in the script the commands to build an executable
1017          or a library.
# Line 865 | Line 1020 | class Cmssw(JobType):
1020          txt = ""
1021  
1022          if os.path.isfile(self.tgzNameWithPath):
1023 <            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
1023 >            txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
1024              txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1025              txt += 'untar_status=$? \n'
1026              txt += 'if [ $untar_status -ne 0 ]; then \n'
# Line 873 | Line 1028 | class Cmssw(JobType):
1028              txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
1029              txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1030              txt += '   if [ $middleware == OSG ]; then \n'
876            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1031              txt += '       cd $RUNTIME_AREA\n'
1032 +            txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1033 +            txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
1034              txt += '       /bin/rm -rf $WORKING_DIR\n'
1035              txt += '       if [ -d $WORKING_DIR ] ;then\n'
1036              txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1037              txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1038              txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1039              txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
884            txt += '           rm -f $RUNTIME_AREA/$repo \n'
885            txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
886            txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1040              txt += '       fi\n'
1041              txt += '   fi \n'
1042              txt += '   \n'
# Line 892 | Line 1045 | class Cmssw(JobType):
1045              txt += '   echo "Successful untar" \n'
1046              txt += 'fi \n'
1047              txt += '\n'
1048 <            txt += 'echo "Include ProdAgentApi in PYTHONPATH"\n'
1048 >            txt += 'echo ">>> Include ProdAgentApi and PRODCOMMON in PYTHONPATH:"\n'
1049              txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1050 <            txt += '   export PYTHONPATH=ProdAgentApi\n'
1050 >            #### FEDE FOR DBS OUTPUT PUBLICATION
1051 >            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1052              txt += 'else\n'
1053 <            txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1053 >            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1054 >            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1055 >            ###################
1056              txt += 'fi\n'
1057              txt += '\n'
1058  
1059              pass
1060 <        
1060 >
1061          return txt
1062  
1063      def modifySteeringCards(self, nj):
1064          """
1065 <        modify the card provided by the user,
1065 >        modify the card provided by the user,
1066          writing a new card into share dir
1067          """
1068 <        
1068 >
1069      def executableName(self):
1070 <        if self.pset == None: #CarlosDaniele
1070 >        if self.scriptExe: #CarlosDaniele
1071              return "sh "
1072          else:
1073              return self.executable
1074  
1075      def executableArgs(self):
1076 <        if self.pset == None:#CarlosDaniele
1076 >        if self.scriptExe:#CarlosDaniele
1077              return   self.scriptExe + " $NJob"
1078 <        else:
1079 <            return " -p pset.cfg"
1078 >        else:
1079 >            # if >= CMSSW_1_5_X, add -j crab_fjr.xml
1080 >            version_array = self.scram.getSWVersion().split('_')
1081 >            major = 0
1082 >            minor = 0
1083 >            try:
1084 >                major = int(version_array[1])
1085 >                minor = int(version_array[2])
1086 >            except:
1087 >                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1088 >                raise CrabException(msg)
1089 >            if major >= 1 and minor >= 5 :
1090 >                return " -j " + self.fjrFileName + " -p pset.cfg"
1091 >            else:
1092 >                return " -p pset.cfg"
1093  
1094      def inputSandbox(self, nj):
1095          """
# Line 932 | Line 1101 | class Cmssw(JobType):
1101          ## code
1102          if os.path.isfile(self.tgzNameWithPath):
1103              inp_box.append(self.tgzNameWithPath)
1104 +        if os.path.isfile(self.MLtgzfile):
1105 +            inp_box.append(self.MLtgzfile)
1106          ## config
1107 <        if not self.pset is None: #CarlosDaniele
1108 <            inp_box.append(common.job_list[nj].configFilename())
1107 >        if not self.pset is None:
1108 >            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1109          ## additional input files
1110 <        #for file in self.additional_inbox_files:
1111 <        #    inp_box.append(common.work_space.cwdDir()+file)
1110 >        tgz = self.additionalInputFileTgz()
1111 >        inp_box.append(tgz)
1112          return inp_box
1113  
1114      def outputSandbox(self, nj):
# Line 948 | Line 1119 | class Cmssw(JobType):
1119  
1120          ## User Declared output files
1121          for out in (self.output_file+self.output_file_sandbox):
1122 <            n_out = nj + 1
1122 >            n_out = nj + 1
1123              out_box.append(self.numberFile_(out,str(n_out)))
1124          return out_box
1125  
# Line 964 | Line 1135 | class Cmssw(JobType):
1135          """
1136  
1137          txt = '\n'
1138 <        txt += '# directory content\n'
1138 >        txt += 'echo" >>> directory content:"\n'
1139          txt += 'ls \n'
1140 +        txt = '\n'
1141 +
1142 +        txt += 'output_exit_status=0\n'
1143  
1144 <        for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1144 >        for fileWithSuffix in (self.output_file_sandbox):
1145              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1146              txt += '\n'
1147              txt += '# check output file\n'
1148 <            txt += 'ls '+fileWithSuffix+'\n'
1149 <            txt += 'ls_result=$?\n'
1150 <            txt += 'if [ $ls_result -ne 0 ] ; then\n'
1151 <            txt += '   echo "ERROR: Problem with output file"\n'
1148 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1149 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1150 >            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1151 >            txt += 'else\n'
1152 >            txt += '    exit_status=60302\n'
1153 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1154              if common.scheduler.boss_scheduler_name == 'condor_g':
1155                  txt += '    if [ $middleware == OSG ]; then \n'
1156                  txt += '        echo "prepare dummy output file"\n'
1157                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1158                  txt += '    fi \n'
983            txt += 'else\n'
984            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1159              txt += 'fi\n'
986      
987        txt += 'cd $RUNTIME_AREA\n'
988        txt += 'cd $RUNTIME_AREA\n'
989        ### OLI_DANIELE
990        txt += 'if [ $middleware == OSG ]; then\n'  
991        txt += '    cd $RUNTIME_AREA\n'
992        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
993        txt += '    /bin/rm -rf $WORKING_DIR\n'
994        txt += '    if [ -d $WORKING_DIR ] ;then\n'
995        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
996        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
997        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
998        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
999        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1000        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1001        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1002        txt += '    fi\n'
1003        txt += 'fi\n'
1004        txt += '\n'
1160  
1161 <        file_list = ''
1007 <        ## Add to filelist only files to be possibly copied to SE
1008 <        for fileWithSuffix in self.output_file:
1161 >        for fileWithSuffix in (self.output_file):
1162              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1163 <            file_list=file_list+output_file_num+' '
1164 <        file_list=file_list[:-1]
1165 <        txt += 'file_list="'+file_list+'"\n'
1163 >            txt += '\n'
1164 >            txt += '# check output file\n'
1165 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1166 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1167 >            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1168 >            txt += 'else\n'
1169 >            txt += '    exit_status=60302\n'
1170 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1171 >            txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1172 >            txt += '    output_exit_status=$exit_status\n'
1173 >            if common.scheduler.boss_scheduler_name == 'condor_g':
1174 >                txt += '    if [ $middleware == OSG ]; then \n'
1175 >                txt += '        echo "prepare dummy output file"\n'
1176 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1177 >                txt += '    fi \n'
1178 >            txt += 'fi\n'
1179 >        file_list = []
1180 >        for fileWithSuffix in (self.output_file):
1181 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1182  
1183 +        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1184 +        txt += 'cd $RUNTIME_AREA\n'
1185 +        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1186          return txt
1187  
1188      def numberFile_(self, file, txt):
# Line 1021 | Line 1193 | class Cmssw(JobType):
1193          # take away last extension
1194          name = p[0]
1195          for x in p[1:-1]:
1196 <           name=name+"."+x
1196 >            name=name+"."+x
1197          # add "_txt"
1198          if len(p)>1:
1199 <          ext = p[len(p)-1]
1200 <          result = name + '_' + txt + "." + ext
1199 >            ext = p[len(p)-1]
1200 >            result = name + '_' + txt + "." + ext
1201          else:
1202 <          result = name + '_' + txt
1203 <        
1202 >            result = name + '_' + txt
1203 >
1204          return result
1205  
1206 <    def getRequirements(self):
1206 >    def getRequirements(self, nj=[]):
1207          """
1208 <        return job requirements to add to jdl files
1208 >        return job requirements to add to jdl files
1209          """
1210          req = ''
1211          if self.version:
1212              req='Member("VO-cms-' + \
1213                   self.version + \
1214                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1215 +        ## SL add requirement for OS version only if SL4
1216 +        #reSL4 = re.compile( r'slc4' )
1217 +        if self.executable_arch: # and reSL4.search(self.executable_arch):
1218 +            req+=' && Member("VO-cms-' + \
1219 +                 self.executable_arch + \
1220 +                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1221  
1222          req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1223  
# Line 1049 | Line 1227 | class Cmssw(JobType):
1227          """ return the config filename """
1228          return self.name()+'.cfg'
1229  
1052    ### OLI_DANIELE
1230      def wsSetupCMSOSGEnvironment_(self):
1231          """
1232          Returns part of a job script which is prepares
1233          the execution environment and which is common for all CMS jobs.
1234          """
1235 <        txt = '\n'
1236 <        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1237 <        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1238 <        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1239 <        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1063 <        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1235 >        txt = '    echo ">>> setup CMS OSG environment:"\n'
1236 >        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1237 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1238 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1239 >        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1240          txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1241 <        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1242 <        txt += '   else\n'
1243 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1244 <        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1245 <        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1246 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1071 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1072 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1073 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1074 <        txt += '       exit 1\n'
1241 >        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1242 >        txt += '    else\n'
1243 >        txt += '        echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1244 >        txt += '        echo "JOB_EXIT_STATUS = 10020"\n'
1245 >        txt += '        echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1246 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1247          txt += '\n'
1248 <        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1249 <        txt += '       cd $RUNTIME_AREA\n'
1250 <        txt += '       /bin/rm -rf $WORKING_DIR\n'
1251 <        txt += '       if [ -d $WORKING_DIR ] ;then\n'
1252 <        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1253 <        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1254 <        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1255 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1256 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
1257 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1086 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1087 <        txt += '       fi\n'
1248 >        txt += '        cd $RUNTIME_AREA\n'
1249 >        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1250 >        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
1251 >        txt += '        /bin/rm -rf $WORKING_DIR\n'
1252 >        txt += '        if [ -d $WORKING_DIR ] ;then\n'
1253 >        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1254 >        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1255 >        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1256 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1257 >        txt += '        fi\n'
1258          txt += '\n'
1259 <        txt += '       exit 1\n'
1260 <        txt += '   fi\n'
1259 >        txt += '        exit 1\n'
1260 >        txt += '    fi\n'
1261          txt += '\n'
1262 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1263 <        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1262 >        txt += '    echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1263 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1264  
1265          return txt
1266 <
1266 >
1267      ### OLI_DANIELE
1268      def wsSetupCMSLCGEnvironment_(self):
1269          """
1270          Returns part of a job script which is prepares
1271          the execution environment and which is common for all CMS jobs.
1272          """
1273 <        txt  = '   \n'
1274 <        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
1275 <        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
1276 <        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1277 <        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1278 <        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1279 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1280 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1281 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1282 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1283 <        txt += '       exit 1\n'
1284 <        txt += '   else\n'
1285 <        txt += '       echo "Sourcing environment... "\n'
1286 <        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1287 <        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1288 <        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1289 <        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1290 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1291 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1292 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1293 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1294 <        txt += '           exit 1\n'
1295 <        txt += '       fi\n'
1296 <        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1297 <        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1298 <        txt += '       result=$?\n'
1299 <        txt += '       if [ $result -ne 0 ]; then\n'
1300 <        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1301 <        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1302 <        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1303 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1304 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1305 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1306 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1307 <        txt += '           exit 1\n'
1308 <        txt += '       fi\n'
1309 <        txt += '   fi\n'
1310 <        txt += '   \n'
1311 <        txt += '   string=`cat /etc/redhat-release`\n'
1312 <        txt += '   echo $string\n'
1313 <        txt += '   if [[ $string = *alhalla* ]]; then\n'
1314 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1315 <        txt += '   elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
1316 <        txt += '       export SCRAM_ARCH=slc3_ia32_gcc323\n'
1317 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1318 <        txt += '   else\n'
1319 <        txt += '       echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
1320 <        txt += '       echo "JOB_EXIT_STATUS = 10033"\n'
1321 <        txt += '       echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
1322 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1323 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1324 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1325 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1326 <        txt += '       exit 1\n'
1327 <        txt += '   fi\n'
1328 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1329 <        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1273 >        txt = '    echo ">>> setup CMS LCG environment:"\n'
1274 >        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1275 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1276 >        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1277 >        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1278 >        txt += '        echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1279 >        txt += '        echo "JOB_EXIT_STATUS = 10031" \n'
1280 >        txt += '        echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1281 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1282 >        txt += '        exit 1\n'
1283 >        txt += '    else\n'
1284 >        txt += '        echo "Sourcing environment... "\n'
1285 >        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1286 >        txt += '            echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1287 >        txt += '            echo "JOB_EXIT_STATUS = 10020"\n'
1288 >        txt += '            echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1289 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1290 >        txt += '            exit 1\n'
1291 >        txt += '        fi\n'
1292 >        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1293 >        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1294 >        txt += '        result=$?\n'
1295 >        txt += '        if [ $result -ne 0 ]; then\n'
1296 >        txt += '            echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1297 >        txt += '            echo "JOB_EXIT_STATUS = 10032"\n'
1298 >        txt += '            echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1299 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1300 >        txt += '            exit 1\n'
1301 >        txt += '        fi\n'
1302 >        txt += '    fi\n'
1303 >        txt += '    \n'
1304 >        txt += '    echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1305 >        return txt
1306 >
1307 >    ### FEDE FOR DBS OUTPUT PUBLICATION
1308 >    def modifyReport(self, nj):
1309 >        """
1310 >        insert the part of the script that modifies the FrameworkJob Report
1311 >        """
1312 >
1313 >        txt = ''
1314 >        try:
1315 >            publish_data = int(self.cfg_params['USER.publish_data'])
1316 >        except KeyError:
1317 >            publish_data = 0
1318 >        if (publish_data == 1):
1319 >            txt += 'echo ">>> Modify Job Report:" \n'
1320 >            ################ FEDE FOR DBS2 #############################################
1321 >            txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1322 >            #############################################################################
1323 >
1324 >            txt += 'if [ -z "$SE" ]; then\n'
1325 >            txt += '    SE="" \n'
1326 >            txt += 'fi \n'
1327 >            txt += 'if [ -z "$SE_PATH" ]; then\n'
1328 >            txt += '    SE_PATH="" \n'
1329 >            txt += 'fi \n'
1330 >            txt += 'echo "SE = $SE"\n'
1331 >            txt += 'echo "SE_PATH = $SE_PATH"\n'
1332 >
1333 >            processedDataset = self.cfg_params['USER.publish_data_name']
1334 >            txt += 'ProcessedDataset='+processedDataset+'\n'
1335 >            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1336 >            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1337 >            #### FEDE: added slash in LFN ##############
1338 >            txt += '    FOR_LFN=/copy_problems/ \n'
1339 >            txt += 'else \n'
1340 >            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1341 >            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1342 >            txt += '    FOR_LFN=/store$tmp \n'
1343 >            txt += 'fi \n'
1344 >            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1345 >            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1346 >            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1347 >            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1348 >            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1349 >            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1350 >            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1351 >
1352 >            txt += 'modifyReport_result=$?\n'
1353 >            txt += 'echo modifyReport_result = $modifyReport_result\n'
1354 >            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1355 >            txt += '    exit_status=1\n'
1356 >            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1357 >            txt += 'else\n'
1358 >            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1359 >            txt += 'fi\n'
1360 >        else:
1361 >            txt += 'echo "no data publication required"\n'
1362 >        return txt
1363 >
1364 >    def cleanEnv(self):
1365 >        txt = ''
1366 >        txt += 'if [ $middleware == OSG ]; then\n'
1367 >        txt += '    cd $RUNTIME_AREA\n'
1368 >        txt += '    echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1369 >        txt += '    echo ">>> Remove working directory: $WORKING_DIR"\n'
1370 >        txt += '    /bin/rm -rf $WORKING_DIR\n'
1371 >        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1372 >        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1373 >        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
1374 >        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1375 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1376 >        txt += '    fi\n'
1377 >        txt += 'fi\n'
1378 >        txt += '\n'
1379          return txt
1380  
1381      def setParam_(self, param, value):
# Line 1167 | Line 1386 | class Cmssw(JobType):
1386  
1387      def setTaskid_(self):
1388          self._taskId = self.cfg_params['taskId']
1389 <        
1389 >
1390      def getTaskid(self):
1391          return self._taskId
1392  
1174 #######################################################################
1393      def uniquelist(self, old):
1394          """
1395          remove duplicates from a list
# Line 1180 | Line 1398 | class Cmssw(JobType):
1398          for e in old:
1399              nd[e]=0
1400          return nd.keys()
1401 +
1402 +
1403 +    def checkOut(self, limit):
1404 +        """
1405 +        check the dimension of the output files
1406 +        """
1407 +        txt += 'echo ">>> Starting output sandbox limit check :"\n'
1408 +        allOutFiles = ""
1409 +        listOutFiles = []
1410 +        for fileOut in (self.output_file+self.output_file_sandbox):
1411 +             if fileOut.find('crab_fjr') == -1:
1412 +                 allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1413 +                 listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1414 +        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1415 +        txt += 'ls -gGhrta;\n'
1416 +        txt += 'sum=0;\n'
1417 +        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1418 +        txt += '    if [ -e $file ]; then\n'
1419 +        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1420 +        txt += '        sum=`expr $sum + $tt`\n'
1421 +        txt += '    else\n'
1422 +        txt += '        echo "WARNING: output file $file not found!"\n'
1423 +        txt += '    fi\n'
1424 +        txt += 'done\n'
1425 +        txt += 'echo "Total Output dimension: $sum";\n'
1426 +        txt += 'limit='+str(limit)+';\n'
1427 +        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1428 +        txt += 'if [ $limit -lt $sum ]; then\n'
1429 +        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1430 +        txt += '    echo "         checking the output file sizes..."\n'
1431 +        """
1432 +        txt += '    dim=0;\n'
1433 +        txt += '    exclude=0;\n'
1434 +        txt += '    for files in '+str(allOutFiles)+' ; do\n'
1435 +        txt += '        sumTemp=0;\n'
1436 +        txt += '        for file2 in '+str(allOutFiles)+' ; do\n'
1437 +        txt += '            if [ $file != $file2 ]; then\n'
1438 +        txt += '                tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1439 +        txt += '                sumTemp=`expr $sumTemp + $tt`;\n'
1440 +        txt += '            fi\n'
1441 +        txt += '        done\n'
1442 +        txt += '        if [ $sumTemp -lt $limit ]; then\n'
1443 +        txt += '            if [ $dim -lt $sumTemp ]; then\n'
1444 +        txt += '                dim=$sumTemp;\n'
1445 +        txt += '                exclude=$file;\n'
1446 +        txt += '            fi\n'
1447 +        txt += '        fi\n'
1448 +        txt += '    done\n'
1449 +        txt += '    echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1450 +        """
1451 +        txt += '    tot=0;\n'
1452 +        txt += '    for file2 in '+str(allOutFiles)+' ; do\n'
1453 +        txt += '        tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1454 +        txt += '        tot=`expr $tot + $tt`;\n'
1455 +        txt += '        if [ $limit -lt $tot ]; then\n'
1456 +        txt += '            tot=`expr $tot - $tt`;\n'
1457 +        txt += '            fileLast=$file;\n'
1458 +        txt += '            break;\n'
1459 +        txt += '        fi\n'
1460 +        txt += '    done\n'
1461 +        txt += '    echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1462 +        txt += '    flag=0;\n'
1463 +        txt += '    for filess in '+str(allOutFiles)+' ; do\n'
1464 +        txt += '        if [ $fileLast = $filess ]; then\n'
1465 +        txt += '            flag=1;\n'
1466 +        txt += '        fi\n'
1467 +        txt += '        if [ $flag -eq 1 ]; then\n'
1468 +        txt += '            rm -f $filess;\n'
1469 +        txt += '        fi\n'
1470 +        txt += '    done\n'
1471 +        txt += '    ls -agGhrt;\n'
1472 +        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1473 +        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1474 +        txt += '    exit_status=70000;\n'
1475 +        txt += 'else'
1476 +        txt += '    echo "Total Output dimension $sum is fine.";\n'
1477 +        txt += 'fi\n'
1478 +        txt += 'echo "Ending output sandbox limit check"\n'
1479 +        return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines