ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.155 by slacapra, Tue Feb 12 15:20:47 2008 UTC vs.
Revision 1.226 by ewv, Thu Jul 3 19:30:07 2008 UTC

# Line 5 | Line 5 | from crab_util import *
5   from BlackWhiteListParser import BlackWhiteListParser
6   import common
7   import Scram
8 + from LFNBaseName import *
9  
10   import os, string, glob
11  
12   class Cmssw(JobType):
13 <    def __init__(self, cfg_params, ncjobs):
13 >    def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
14          JobType.__init__(self, 'CMSSW')
15          common.logger.debug(3,'CMSSW::__init__')
16 +        self.skip_blocks = skip_blocks
17  
18          self.argsList = []
19  
# Line 33 | Line 35 | class Cmssw(JobType):
35          self.executable = ''
36          self.executable_arch = self.scram.getArch()
37          self.tgz_name = 'default.tgz'
36        self.additional_tgz_name = 'additional.tgz'
38          self.scriptName = 'CMSSW.sh'
39 <        self.pset = ''      #scrip use case Da
40 <        self.datasetPath = '' #scrip use case Da
39 >        self.pset = ''
40 >        self.datasetPath = ''
41  
42          # set FJR file name
43          self.fjrFileName = 'crab_fjr.xml'
44  
45          self.version = self.scram.getSWVersion()
46 <
47 <        #
48 <        # Try to block creation in case of arch/version mismatch
49 <        #
50 <
51 <        a = string.split(self.version, "_")
52 <
53 <        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
54 <            msg = "Warning: You are using %s version of CMSSW  with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
55 <            common.logger.message(msg)
55 <        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
56 <            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
46 >        version_array = self.version.split('_')
47 >        self.CMSSW_major = 0
48 >        self.CMSSW_minor = 0
49 >        self.CMSSW_patch = 0
50 >        try:
51 >            self.CMSSW_major = int(version_array[1])
52 >            self.CMSSW_minor = int(version_array[2])
53 >            self.CMSSW_patch = int(version_array[3])
54 >        except:
55 >            msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
56              raise CrabException(msg)
57  
59        common.taskDB.setDict('codeVersion',self.version)
60        self.setParam_('application', self.version)
61
58          ### collect Data cards
59  
60          if not cfg_params.has_key('CMSSW.datasetpath'):
61              msg = "Error: datasetpath not defined "
62              raise CrabException(msg)
63 +
64 +        ### Temporary: added to remove input file control in the case of PU
65 +        self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
66 +
67          tmp =  cfg_params['CMSSW.datasetpath']
68          log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
69          if string.lower(tmp)=='none':
# Line 73 | Line 73 | class Cmssw(JobType):
73              self.datasetPath = tmp
74              self.selectNoInput = 0
75  
76        # ML monitoring
77        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
78        if not self.datasetPath:
79            self.setParam_('dataset', 'None')
80            self.setParam_('owner', 'None')
81        else:
82            ## SL what is supposed to fail here?
83            try:
84                datasetpath_split = self.datasetPath.split("/")
85                # standard style
86                self.setParam_('datasetFull', self.datasetPath)
87                self.setParam_('dataset', datasetpath_split[1])
88                self.setParam_('owner', datasetpath_split[2])
89            except:
90                self.setParam_('dataset', self.datasetPath)
91                self.setParam_('owner', self.datasetPath)
92
93        self.setParam_('taskId', common.taskDB.dict('taskId'))
94
76          self.dataTiers = []
77 <
77 >        self.debugWrap = ''
78 >        self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False)
79 >        if self.debug_wrapper: self.debugWrap='--debug'
80          ## now the application
81          self.executable = cfg_params.get('CMSSW.executable','cmsRun')
99        self.setParam_('exe', self.executable)
82          log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
83  
84          if not cfg_params.has_key('CMSSW.pset'):
# Line 117 | Line 99 | class Cmssw(JobType):
99          self.output_file_sandbox.append(self.fjrFileName)
100  
101          # other output files to be returned via sandbox or copied to SE
102 +        outfileflag = False
103          self.output_file = []
104          tmp = cfg_params.get('CMSSW.output_file',None)
105          if tmp :
106 <            tmpOutFiles = string.split(tmp,',')
107 <            log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
108 <            for tmp in tmpOutFiles:
109 <                tmp=string.strip(tmp)
127 <                self.output_file.append(tmp)
128 <                pass
129 <        else:
130 <            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
131 <        pass
106 >            self.output_file = [x.strip() for x in tmp.split(',')]
107 >            outfileflag = True #output found
108 >        #else:
109 >        #    log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
110  
111          # script_exe file as additional file in inputSandbox
112          self.scriptExe = cfg_params.get('USER.script_exe',None)
113          if self.scriptExe :
114 <           if not os.path.isfile(self.scriptExe):
115 <              msg ="ERROR. file "+self.scriptExe+" not found"
116 <              raise CrabException(msg)
117 <           self.additional_inbox_files.append(string.strip(self.scriptExe))
114 >            if not os.path.isfile(self.scriptExe):
115 >                msg ="ERROR. file "+self.scriptExe+" not found"
116 >                raise CrabException(msg)
117 >            self.additional_inbox_files.append(string.strip(self.scriptExe))
118  
141        #CarlosDaniele
119          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
120 <           msg ="Error. script_exe  not defined"
121 <           raise CrabException(msg)
120 >            msg ="Error. script_exe  not defined"
121 >            raise CrabException(msg)
122 >
123 >        # use parent files...
124 >        self.useParent = self.cfg_params.get('CMSSW.use_parent',False)
125  
126          ## additional input files
127          if cfg_params.has_key('USER.additional_input_files'):
# Line 161 | Line 141 | class Cmssw(JobType):
141                      if not os.path.exists(file):
142                          raise CrabException("Additional input file not found: "+file)
143                      pass
164                    # fname = string.split(file, '/')[-1]
165                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
166                    # shutil.copyfile(file, storedFile)
144                      self.additional_inbox_files.append(string.strip(file))
145                  pass
146              pass
# Line 189 | Line 166 | class Cmssw(JobType):
166          if cfg_params.has_key('CMSSW.total_number_of_events'):
167              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
168              self.selectTotalNumberEvents = 1
169 +            if self.selectNumberOfJobs  == 1:
170 +                if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
171 +                    msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
172 +                    raise CrabException(msg)
173          else:
174              self.total_number_of_events = 0
175              self.selectTotalNumberEvents = 0
176  
177 <        if self.pset != None: #CarlosDaniele
177 >        if self.pset != None:
178               if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
179                   msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
180                   raise CrabException(msg)
# Line 202 | Line 183 | class Cmssw(JobType):
183                   msg = 'Must specify  number_of_jobs.'
184                   raise CrabException(msg)
185  
186 <        ## source seed for pythia
186 >        ## New method of dealing with seeds
187 >        self.incrementSeeds = []
188 >        self.preserveSeeds = []
189 >        if cfg_params.has_key('CMSSW.preserve_seeds'):
190 >            tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
191 >            for tmp in tmpList:
192 >                tmp.strip()
193 >                self.preserveSeeds.append(tmp)
194 >        if cfg_params.has_key('CMSSW.increment_seeds'):
195 >            tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
196 >            for tmp in tmpList:
197 >                tmp.strip()
198 >                self.incrementSeeds.append(tmp)
199 >
200 >        ## Old method of dealing with seeds
201 >        ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
202 >        ## remove
203          self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
204 +        if self.sourceSeed:
205 +            print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
206 +            self.incrementSeeds.append('sourceSeed')
207 +            self.incrementSeeds.append('theSource')
208  
209          self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
210 +        if self.sourceSeedVtx:
211 +            print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
212 +            self.incrementSeeds.append('VtxSmeared')
213  
214          self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
215 +        if self.sourceSeedG4:
216 +            print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
217 +            self.incrementSeeds.append('g4SimHits')
218  
219          self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
220 +        if self.sourceSeedMix:
221 +            print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
222 +            self.incrementSeeds.append('mix')
223  
224          self.firstRun = cfg_params.get('CMSSW.first_run',None)
225  
216        if self.pset != None: #CarlosDaniele
217            import PsetManipulator as pp
218            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
226  
227          # Copy/return
221
228          self.copy_data = int(cfg_params.get('USER.copy_data',0))
229          self.return_data = int(cfg_params.get('USER.return_data',0))
230  
# Line 234 | Line 240 | class Cmssw(JobType):
240              blockSites = self.DataDiscoveryAndLocation(cfg_params)
241          #DBSDLS-end
242  
237        self.tgzNameWithPath = self.getTarBall(self.executable)
238
243          ## Select Splitting
244          if self.selectNoInput:
245 <            if self.pset == None: #CarlosDaniele
245 >            if self.pset == None:
246                  self.jobSplittingForScript()
247              else:
248                  self.jobSplittingNoInput()
249          else:
250              self.jobSplittingByBlocks(blockSites)
251  
252 <        # modify Pset
253 <        if self.pset != None: #CarlosDaniele
254 <            try:
255 <                if (self.datasetPath): # standard job
256 <                    # allow to processa a fraction of events in a file
257 <                    PsetEdit.inputModule("INPUTFILE")
258 <                    PsetEdit.maxEvent(0)
259 <                    PsetEdit.skipEvent(0)
260 <                else:  # pythia like job
252 >        # modify Pset only the first time
253 >        if isNew:
254 >            if self.pset != None:
255 >                import PsetManipulator as pp
256 >                PsetEdit = pp.PsetManipulator(self.pset)
257 >                try:
258 >                    # Add FrameworkJobReport to parameter-set, set max events.
259 >                    # Reset later for data jobs by writeCFG which does all modifications
260 >                    PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
261                      PsetEdit.maxEvent(self.eventsPerJob)
262 <                    if (self.firstRun):
263 <                        PsetEdit.pythiaFirstRun(0)  #First Run
264 <                    if (self.sourceSeed) :
265 <                        PsetEdit.pythiaSeed(0)
266 <                        if (self.sourceSeedVtx) :
267 <                            PsetEdit.vtxSeed(0)
268 <                        if (self.sourceSeedG4) :
269 <                            PsetEdit.g4Seed(0)
270 <                        if (self.sourceSeedMix) :
271 <                            PsetEdit.mixSeed(0)
272 <                # add FrameworkJobReport to parameter-set
273 <                PsetEdit.addCrabFJR(self.fjrFileName)
274 <                PsetEdit.psetWriter(self.configFilename())
275 <            except:
276 <                msg='Error while manipuliating ParameterSet: exiting...'
277 <                raise CrabException(msg)
262 >                    PsetEdit.psetWriter(self.configFilename())
263 >                    ## If present, add TFileService to output files
264 >                    if not int(cfg_params.get('CMSSW.skip_TFileService_output',0)):
265 >                        tfsOutput = PsetEdit.getTFileService()
266 >                        if tfsOutput:
267 >                            if tfsOutput in self.output_file:
268 >                                common.logger.debug(5,"Output from TFileService "+tfsOutput+" already in output files")
269 >                            else:
270 >                                outfileflag = True #output found
271 >                                self.output_file.append(tfsOutput)
272 >                                common.logger.message("Adding "+tfsOutput+" to output files (from TFileService)")
273 >                            pass
274 >                        pass
275 >                    ## If present and requested, add PoolOutputModule to output files
276 >                    if int(cfg_params.get('CMSSW.get_edm_output',0)):
277 >                        edmOutput = PsetEdit.getPoolOutputModule()
278 >                        if edmOutput:
279 >                            if edmOutput in self.output_file:
280 >                                common.logger.debug(5,"Output from PoolOutputModule "+edmOutput+" already in output files")
281 >                            else:
282 >                                self.output_file.append(edmOutput)
283 >                                common.logger.message("Adding "+edmOutput+" to output files (from PoolOutputModule)")
284 >                            pass
285 >                        pass
286 >                except CrabException:
287 >                    msg='Error while manipulating ParameterSet: exiting...'
288 >                    raise CrabException(msg)
289 >            ## Prepare inputSandbox TarBall (only the first time)
290 >            self.tgzNameWithPath = self.getTarBall(self.executable)
291  
292      def DataDiscoveryAndLocation(self, cfg_params):
293  
# Line 283 | Line 300 | class Cmssw(JobType):
300          ## Contact the DBS
301          common.logger.message("Contacting Data Discovery Services ...")
302          try:
303 <            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
303 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
304              self.pubdata.fetchDBSInfo()
305  
306          except DataDiscovery.NotExistingDatasetError, ex :
# Line 299 | Line 316 | class Cmssw(JobType):
316          self.filesbyblock=self.pubdata.getFiles()
317          self.eventsbyblock=self.pubdata.getEventsPerBlock()
318          self.eventsbyfile=self.pubdata.getEventsPerFile()
319 +        self.parentFiles=self.pubdata.getParent()
320  
321          ## get max number of events
322 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
322 >        self.maxEvents=self.pubdata.getMaxEvents()
323  
324          ## Contact the DLS and build a list of sites hosting the fileblocks
325          try:
# Line 325 | Line 343 | class Cmssw(JobType):
343  
344          return sites
345  
328    def setArgsList(self, argsList):
329        self.argsList = argsList
330
346      def jobSplittingByBlocks(self, blockSites):
347          """
348          Perform job splitting. Jobs run over an integer number of files
# Line 378 | Line 393 | class Cmssw(JobType):
393          else :
394              totalNumberOfJobs = self.ncjobs
395  
381
396          blocks = blockSites.keys()
397          blockCount = 0
398          # Backup variable in case self.maxEvents counted events in a non-included block
# Line 419 | Line 433 | class Cmssw(JobType):
433  
434                  # ---- Iterate over the files in the block until we've met the requested ---- #
435                  # ---- total # of events or we've gone over all the files in this block  ---- #
436 +                pString=''
437                  while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
438                      file = files[fileCount]
439 +                    if self.useParent:
440 +                        parent = self.parentFiles[file]
441 +                        for f in parent :
442 +                            pString += '\\\"' + f + '\\\"\,'
443 +                        common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
444 +                        common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
445                      if newFile :
446                          try:
447                              numEventsInFile = self.eventsbyfile[file]
# Line 433 | Line 454 | class Cmssw(JobType):
454                          except KeyError:
455                              common.logger.message("File "+str(file)+" has unknown number of events: skipping")
456  
457 <
457 >                    eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
458                      # if less events in file remain than eventsPerJobRequested
459 <                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
459 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
460                          # if last file in block
461                          if ( fileCount == numFilesInBlock-1 ) :
462                              # end job using last file, use remaining events in block
463                              # close job and touch new file
464                              fullString = parString[:-2]
465 <                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
465 >                            if self.useParent:
466 >                                fullParentString = pString[:-2]
467 >                                list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
468 >                            else:
469 >                                list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
470                              common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
471                              self.jobDestination.append(blockSites[block])
472                              common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 453 | Line 478 | class Cmssw(JobType):
478                              eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
479                              jobSkipEventCount = 0
480                              # reset file
481 +                            pString = ""
482                              parString = ""
483                              filesEventCount = 0
484                              newFile = 1
# Line 465 | Line 491 | class Cmssw(JobType):
491                      elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
492                          # close job and touch new file
493                          fullString = parString[:-2]
494 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
494 >                        if self.useParent:
495 >                            fullParentString = pString[:-2]
496 >                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
497 >                        else:
498 >                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
499                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
500                          self.jobDestination.append(blockSites[block])
501                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 476 | Line 506 | class Cmssw(JobType):
506                          eventsRemaining = eventsRemaining - eventsPerJobRequested
507                          jobSkipEventCount = 0
508                          # reset file
509 +                        pString = ""
510                          parString = ""
511                          filesEventCount = 0
512                          newFile = 1
# Line 485 | Line 516 | class Cmssw(JobType):
516                      else :
517                          # close job but don't touch new file
518                          fullString = parString[:-2]
519 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
519 >                        if self.useParent:
520 >                            fullParentString = pString[:-2]
521 >                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
522 >                        else:
523 >                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
524                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
525                          self.jobDestination.append(blockSites[block])
526                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 499 | Line 534 | class Cmssw(JobType):
534                          jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
535                          # remove all but the last file
536                          filesEventCount = self.eventsbyfile[file]
537 <                        parString = ""
538 <                        parString += '\\\"' + file + '\\\"\,'
537 >                        if self.useParent:
538 >                            for f in parent : pString += '\\\"' + f + '\\\"\,'
539 >                        parString = '\\\"' + file + '\\\"\,'
540                      pass # END if
541                  pass # END while (iterate over files in the block)
542          pass # END while (iterate over blocks in the dataset)
# Line 520 | Line 556 | class Cmssw(JobType):
556          for block in blocks:
557              if block in jobsOfBlock.keys() :
558                  blockCounter += 1
559 <                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
559 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
560 >                    ','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
561                  if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
562                      noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
563                      bloskNoSite.append( blockCounter )
# Line 602 | Line 639 | class Cmssw(JobType):
639              if (self.firstRun):
640                  ## pythia first run
641                  args.append(str(self.firstRun)+str(i))
605            if (self.sourceSeed):
606                args.append(str(self.sourceSeed)+str(i))
607                if (self.sourceSeedVtx):
608                    ## + vtx random seed
609                    args.append(str(self.sourceSeedVtx)+str(i))
610                if (self.sourceSeedG4):
611                    ## + G4 random seed
612                    args.append(str(self.sourceSeedG4)+str(i))
613                if (self.sourceSeedMix):
614                    ## + Mix random seed
615                    args.append(str(self.sourceSeedMix)+str(i))
616                pass
617            pass
642              self.list_of_args.append(args)
619        pass
643  
644          return
645  
646  
647 <    def jobSplittingForScript(self):#CarlosDaniele
647 >    def jobSplittingForScript(self):
648          """
649          Perform job splitting based on number of job
650          """
# Line 637 | Line 660 | class Cmssw(JobType):
660          # argument is seed number.$i
661          self.list_of_args = []
662          for i in range(self.total_number_of_jobs):
640            ## Since there is no input, any site is good
641           # self.jobDestination.append(["Any"])
663              self.jobDestination.append([""])
643            ## no random seed
664              self.list_of_args.append([str(i)])
665          return
666  
667 <    def split(self, jobParams):
667 >    def split(self, jobParams,firstJobID):
668  
649        common.jobDB.load()
650        #### Fabio
669          njobs = self.total_number_of_jobs
670          arglist = self.list_of_args
671          # create the empty structure
672          for i in range(njobs):
673              jobParams.append("")
674  
675 <        for job in range(njobs):
676 <            jobParams[job] = arglist[job]
677 <            # print str(arglist[job])
678 <            # print jobParams[job]
679 <            common.jobDB.setArguments(job, jobParams[job])
680 <            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
681 <            common.jobDB.setDestination(job, self.jobDestination[job])
675 >        listID=[]
676 >        listField=[]
677 >        for id in range(njobs):
678 >            job = id + int(firstJobID)
679 >            jobParams[id] = arglist[id]
680 >            listID.append(job+1)
681 >            job_ToSave ={}
682 >            concString = ' '
683 >            argu=''
684 >            if len(jobParams[id]):
685 >                argu +=   concString.join(jobParams[id] )
686 >            job_ToSave['arguments']= str(job+1)+' '+argu
687 >            job_ToSave['dlsDestination']= self.jobDestination[id]
688 >            listField.append(job_ToSave)
689 >            msg="Job "+str(job)+" Arguments:   "+str(job+1)+" "+argu+"\n"  \
690 >            +"                     Destination: "+str(self.jobDestination[id])
691 >            common.logger.debug(5,msg)
692 >        common._db.updateJob_(listID,listField)
693 >        self.argsList = (len(jobParams[0])+1)
694  
665        common.jobDB.save()
695          return
696  
668    def getJobTypeArguments(self, nj, sched):
669        result = ''
670        for i in common.jobDB.arguments(nj):
671            result=result+str(i)+" "
672        return result
673
697      def numberOfJobs(self):
675        # Fabio
698          return self.total_number_of_jobs
699  
700      def getTarBall(self, exe):
701          """
702          Return the TarBall with lib and exe
703          """
682
683        # if it exist, just return it
684        #
685        # Marco. Let's start to use relative path for Boss XML files
686        #
704          self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
705          if os.path.exists(self.tgzNameWithPath):
706              return self.tgzNameWithPath
# Line 697 | Line 714 | class Cmssw(JobType):
714  
715          # First of all declare the user Scram area
716          swArea = self.scram.getSWArea_()
700        #print "swArea = ", swArea
701        # swVersion = self.scram.getSWVersion()
702        # print "swVersion = ", swVersion
717          swReleaseTop = self.scram.getReleaseTop_()
704        #print "swReleaseTop = ", swReleaseTop
718  
719          ## check if working area is release top
720          if swReleaseTop == '' or swArea == swReleaseTop:
721 +            common.logger.debug(3,"swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
722              return
723  
724          import tarfile
# Line 746 | Line 760 | class Cmssw(JobType):
760                  tar.add(module,moduleDir)
761  
762              ## Now check if any data dir(s) is present
763 <            swAreaLen=len(swArea)
764 <            for root, dirs, files in os.walk(swArea):
765 <                if "data" in dirs:
766 <                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
767 <                    tar.add(root+"/data",root[swAreaLen:]+"/data")
768 <
769 <            ### Removed ProdAgent Api dependencies ###
770 <            ### Add ProdAgent dir to tar
771 <            #paDir = 'ProdAgentApi'
772 <            #pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
773 <            #if os.path.isdir(pa):
774 <            #    tar.add(pa,paDir)
763 >            self.dataExist = False
764 >            todo_list = [(i, i) for i in  os.listdir(swArea+"/src")]
765 >            while len(todo_list):
766 >                entry, name = todo_list.pop()
767 >                if name.startswith('crab_0_') or  name.startswith('.') or name == 'CVS':
768 >                    continue
769 >                if os.path.isdir(swArea+"/src/"+entry):
770 >                    entryPath = entry + '/'
771 >                    todo_list += [(entryPath + i, i) for i in  os.listdir(swArea+"/src/"+entry)]
772 >                    if name == 'data':
773 >                        self.dataExist=True
774 >                        common.logger.debug(5,"data "+entry+" to be tarred")
775 >                        tar.add(swArea+"/src/"+entry,"src/"+entry)
776 >                    pass
777 >                pass
778 >
779 >            ### CMSSW ParameterSet
780 >            if not self.pset is None:
781 >                cfg_file = common.work_space.jobDir()+self.configFilename()
782 >                tar.add(cfg_file,self.configFilename())
783 >                common.logger.debug(5,"File added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
784 >
785  
786              ## Add ProdCommon dir to tar
787 <            prodcommonDir = 'ProdCommon'
788 <            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
789 <            if os.path.isdir(prodcommonPath):
790 <                tar.add(prodcommonPath,prodcommonDir)
787 >            prodcommonDir = './'
788 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
789 >            neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools','ProdCommon/Core','ProdCommon/MCPayloads', 'IMProv']
790 >            for file in neededStuff:
791 >                tar.add(prodcommonPath+file,prodcommonDir+file)
792 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
793  
794 +            ##### ML stuff
795 +            ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
796 +            path=os.environ['CRABDIR'] + '/python/'
797 +            for file in ML_file_list:
798 +                tar.add(path+file,file)
799              common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
800 +
801 +            ##### Utils
802 +            Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py']
803 +            for file in Utils_file_list:
804 +                tar.add(path+file,file)
805 +            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
806 +
807 +            ##### AdditionalFiles
808 +            for file in self.additional_inbox_files:
809 +                tar.add(file,string.split(file,'/')[-1])
810 +            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
811 +
812              tar.close()
813 <        except :
814 <            raise CrabException('Could not create tar-ball')
813 >        except IOError:
814 >            raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
815 >        except tarfile.TarError:
816 >            raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
817  
818          ## check for tarball size
819          tarballinfo = os.stat(self.tgzNameWithPath)
# Line 776 | Line 821 | class Cmssw(JobType):
821              raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
822  
823          ## create tar-ball with ML stuff
779        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
780        try:
781            tar = tarfile.open(self.MLtgzfile, "w:gz")
782            path=os.environ['CRABDIR'] + '/python/'
783            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
784                tar.add(path+file,file)
785            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
786            tar.close()
787        except :
788            raise CrabException('Could not create ML files tar-ball')
824  
825 <        return
791 <
792 <    def additionalInputFileTgz(self):
793 <        """
794 <        Put all additional files into a tar ball and return its name
795 <        """
796 <        import tarfile
797 <        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
798 <        tar = tarfile.open(tarName, "w:gz")
799 <        for file in self.additional_inbox_files:
800 <            tar.add(file,string.split(file,'/')[-1])
801 <        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
802 <        tar.close()
803 <        return tarName
804 <
805 <    def wsSetupEnvironment(self, nj):
825 >    def wsSetupEnvironment(self, nj=0):
826          """
827          Returns part of a job script which prepares
828          the execution environment for the job 'nj'.
829          """
830 +        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
831 +            psetName = 'pset.py'
832 +        else:
833 +            psetName = 'pset.cfg'
834          # Prepare JobType-independent part
835 <        txt = ''
835 >        txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
836          txt += 'echo ">>> setup environment"\n'
837          txt += 'if [ $middleware == LCG ]; then \n'
838          txt += self.wsSetupCMSLCGEnvironment_()
839          txt += 'elif [ $middleware == OSG ]; then\n'
840          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
841          txt += '    if [ ! $? == 0 ] ;then\n'
842 <        txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
843 <        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
844 <        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
821 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
822 <        txt += '        exit 1\n'
842 >        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
843 >        txt += '        job_exit_code=10016\n'
844 >        txt += '        func_exit\n'
845          txt += '    fi\n'
846          txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
847          txt += '\n'
# Line 827 | Line 849 | class Cmssw(JobType):
849          txt += '    cd $WORKING_DIR\n'
850          txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
851          txt += self.wsSetupCMSOSGEnvironment_()
830        #txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
831        #txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
852          txt += 'fi\n'
853  
854          # Prepare JobType-specific part
# Line 839 | Line 859 | class Cmssw(JobType):
859          txt += scram+' project CMSSW '+self.version+'\n'
860          txt += 'status=$?\n'
861          txt += 'if [ $status != 0 ] ; then\n'
862 <        txt += '    echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
863 <        txt += '    echo "JOB_EXIT_STATUS = 10034"\n'
864 <        txt += '    echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
845 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
846 <        txt += '    if [ $middleware == OSG ]; then \n'
847 <        txt += '        cd $RUNTIME_AREA\n'
848 <        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
849 <        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
850 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
851 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
852 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
853 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
854 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
855 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
856 <        txt += '        fi\n'
857 <        txt += '    fi \n'
858 <        txt += '    exit 1 \n'
862 >        txt += '    echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
863 >        txt += '    job_exit_code=10034\n'
864 >        txt += '    func_exit\n'
865          txt += 'fi \n'
866          txt += 'cd '+self.version+'\n'
861        ########## FEDE FOR DBS2 ######################
867          txt += 'SOFTWARE_DIR=`pwd`\n'
868          txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
864        ###############################################
865        ### needed grep for bug in scramv1 ###
869          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
870 +        txt += 'if [ $? != 0 ] ; then\n'
871 +        txt += '    echo "ERROR ==> Problem with the command: "\n'
872 +        txt += '    echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
873 +        txt += '    job_exit_code=10034\n'
874 +        txt += '    func_exit\n'
875 +        txt += 'fi \n'
876          # Handle the arguments:
877          txt += "\n"
878          txt += "## number of arguments (first argument always jobnumber)\n"
879          txt += "\n"
880 <        txt += "if [ $nargs -lt "+str(len(self.argsList[nj].split()))+" ]\n"
880 >        txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
881          txt += "then\n"
882 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
883 <        txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
884 <        txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
876 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
877 <        txt += '    if [ $middleware == OSG ]; then \n'
878 <        txt += '        cd $RUNTIME_AREA\n'
879 <        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
880 <        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
881 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
882 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
883 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
884 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
885 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
886 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
887 <        txt += '        fi\n'
888 <        txt += '    fi \n'
889 <        txt += "    exit 1\n"
882 >        txt += "    echo 'ERROR ==> Too few arguments' +$nargs+ \n"
883 >        txt += '    job_exit_code=50113\n'
884 >        txt += "    func_exit\n"
885          txt += "fi\n"
886          txt += "\n"
887  
888          # Prepare job-specific part
889          job = common.job_list[nj]
895        ### FEDE FOR DBS OUTPUT PUBLICATION
890          if (self.datasetPath):
891              txt += '\n'
892              txt += 'DatasetPath='+self.datasetPath+'\n'
# Line 908 | Line 902 | class Cmssw(JobType):
902              txt += 'PrimaryDataset=null\n'
903              txt += 'DataTier=null\n'
904              txt += 'ApplicationFamily=MCDataTier\n'
905 <        if self.pset != None: #CarlosDaniele
905 >        if self.pset != None:
906              pset = os.path.basename(job.configFilename())
907              txt += '\n'
908              txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
909              if (self.datasetPath): # standard job
910 <                txt += 'InputFiles=${args[1]}\n'
911 <                txt += 'MaxEvents=${args[2]}\n'
912 <                txt += 'SkipEvents=${args[3]}\n'
910 >                txt += 'InputFiles=${args[1]}; export InputFiles\n'
911 >                if (self.useParent):
912 >                    txt += 'ParentFiles=${args[2]}; export ParentFiles\n'
913 >                    txt += 'MaxEvents=${args[3]}; export MaxEvents\n'
914 >                    txt += 'SkipEvents=${args[4]}; export SkipEvents\n'
915 >                else:
916 >                    txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
917 >                    txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
918                  txt += 'echo "Inputfiles:<$InputFiles>"\n'
919 <                txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
919 >                if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n'
920                  txt += 'echo "MaxEvents:<$MaxEvents>"\n'
922                txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
921                  txt += 'echo "SkipEvents:<$SkipEvents>"\n'
924                txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
922              else:  # pythia like job
923 <                seedIndex=1
923 >                txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
924 >                txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
925 >                txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
926 >                txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
927                  if (self.firstRun):
928 <                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
928 >                    txt += 'FirstRun=${args[1]}; export FirstRun\n'
929                      txt += 'echo "FirstRun: <$FirstRun>"\n'
930                    txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
931                    seedIndex=seedIndex+1
930  
931 <                if (self.sourceSeed):
934 <                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
935 <                    txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
936 <                    seedIndex=seedIndex+1
937 <                    ## the following seeds are not always present
938 <                    if (self.sourceSeedVtx):
939 <                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
940 <                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
941 <                        txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
942 <                        seedIndex += 1
943 <                    if (self.sourceSeedG4):
944 <                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
945 <                        txt += 'echo "G4Seed: <$G4Seed>"\n'
946 <                        txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
947 <                        seedIndex += 1
948 <                    if (self.sourceSeedMix):
949 <                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
950 <                        txt += 'echo "MixSeed: <$mixSeed>"\n'
951 <                        txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
952 <                        seedIndex += 1
953 <                    pass
954 <                pass
955 <            txt += 'mv -f '+pset+' pset.cfg\n'
931 >            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
932  
957        if len(self.additional_inbox_files) > 0:
958            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
959            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
960            txt += 'fi\n'
961            pass
933  
934 <        if self.pset != None: #CarlosDaniele
934 >        if self.pset != None:
935 >            # FUTURE: Can simply for 2_1_x and higher
936              txt += '\n'
937 <            txt += 'echo "***** cat pset.cfg *********"\n'
938 <            txt += 'cat pset.cfg\n'
939 <            txt += 'echo "****** end pset.cfg ********"\n'
940 <            txt += '\n'
941 <            ### FEDE FOR DBS OUTPUT PUBLICATION
942 <            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
937 >            if self.debug_wrapper==True:
938 >                txt += 'echo "***** cat ' + psetName + ' *********"\n'
939 >                txt += 'cat ' + psetName + '\n'
940 >                txt += 'echo "****** end ' + psetName + ' ********"\n'
941 >                txt += '\n'
942 >            if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
943 >                txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
944 >            else:
945 >                txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
946              txt += 'echo "PSETHASH = $PSETHASH" \n'
972            ##############
947              txt += '\n'
948          return txt
949  
950 <    def wsBuildExe(self, nj=0):
950 >    def wsUntarSoftware(self, nj=0):
951          """
952          Put in the script the commands to build an executable
953          or a library.
954          """
955  
956 <        txt = ""
956 >        txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
957  
958          if os.path.isfile(self.tgzNameWithPath):
959              txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
960              txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
961 +            if  self.debug_wrapper:
962 +                txt += 'ls -Al \n'
963              txt += 'untar_status=$? \n'
964              txt += 'if [ $untar_status -ne 0 ]; then \n'
965 <            txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
966 <            txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
967 <            txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
992 <            txt += '   if [ $middleware == OSG ]; then \n'
993 <            txt += '       cd $RUNTIME_AREA\n'
994 <            txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
995 <            txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
996 <            txt += '       /bin/rm -rf $WORKING_DIR\n'
997 <            txt += '       if [ -d $WORKING_DIR ] ;then\n'
998 <            txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
999 <            txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1000 <            txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1001 <            txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1002 <            txt += '       fi\n'
1003 <            txt += '   fi \n'
1004 <            txt += '   \n'
1005 <            txt += '   exit 1 \n'
965 >            txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
966 >            txt += '   job_exit_code=$untar_status\n'
967 >            txt += '   func_exit\n'
968              txt += 'else \n'
969              txt += '   echo "Successful untar" \n'
970              txt += 'fi \n'
971              txt += '\n'
972 <            #### Removed ProdAgent API dependencies
1011 <            txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
972 >            txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
973              txt += 'if [ -z "$PYTHONPATH" ]; then\n'
974 <            #### FEDE FOR DBS OUTPUT PUBLICATION
1014 <            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
974 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
975              txt += 'else\n'
976 <            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
976 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
977              txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1018            ###################
978              txt += 'fi\n'
979              txt += '\n'
980  
# Line 1023 | Line 982 | class Cmssw(JobType):
982  
983          return txt
984  
985 <    def modifySteeringCards(self, nj):
985 >    def wsBuildExe(self, nj=0):
986          """
987 <        modify the card provided by the user,
988 <        writing a new card into share dir
987 >        Put in the script the commands to build an executable
988 >        or a library.
989          """
990  
991 +        txt = '\n#Written by cms_cmssw::wsBuildExe\n'
992 +        txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
993 +
994 +        txt += 'rm -r lib/ module/ \n'
995 +        txt += 'mv $RUNTIME_AREA/lib/ . \n'
996 +        txt += 'mv $RUNTIME_AREA/module/ . \n'
997 +        if self.dataExist == True:
998 +            txt += 'rm -r src/ \n'
999 +            txt += 'mv $RUNTIME_AREA/src/ . \n'
1000 +        if len(self.additional_inbox_files)>0:
1001 +            for file in self.additional_inbox_files:
1002 +                txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
1003 +        # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
1004 +        # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
1005 +
1006 +        txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
1007 +        txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1008 +        txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
1009 +        txt += 'else\n'
1010 +        txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
1011 +        txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1012 +        txt += 'fi\n'
1013 +        txt += '\n'
1014 +
1015 +        return txt
1016 +
1017 +
1018      def executableName(self):
1019 <        if self.scriptExe: #CarlosDaniele
1019 >        if self.scriptExe:
1020              return "sh "
1021          else:
1022              return self.executable
1023  
1024      def executableArgs(self):
1025 +        # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
1026          if self.scriptExe:#CarlosDaniele
1027              return   self.scriptExe + " $NJob"
1028          else:
1029 <            # if >= CMSSW_1_5_X, add -j crab_fjr.xml
1030 <            version_array = self.scram.getSWVersion().split('_')
1031 <            major = 0
1032 <            minor = 0
1033 <            try:
1034 <                major = int(version_array[1])
1035 <                minor = int(version_array[2])
1036 <            except:
1050 <                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1051 <                raise CrabException(msg)
1052 <            if major >= 1 and minor >= 5 :
1053 <                return " -j " + self.fjrFileName + " -p pset.cfg"
1029 >            ex_args = ""
1030 >            # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
1031 >            # Framework job report
1032 >            if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
1033 >                ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
1034 >            # Type of config file
1035 >            if self.CMSSW_major >= 2 :
1036 >                ex_args += " -p pset.py"
1037              else:
1038 <                return " -p pset.cfg"
1038 >                ex_args += " -p pset.cfg"
1039 >            return ex_args
1040  
1041      def inputSandbox(self, nj):
1042          """
1043          Returns a list of filenames to be put in JDL input sandbox.
1044          """
1045          inp_box = []
1062        # # dict added to delete duplicate from input sandbox file list
1063        # seen = {}
1064        ## code
1046          if os.path.isfile(self.tgzNameWithPath):
1047              inp_box.append(self.tgzNameWithPath)
1048 <        if os.path.isfile(self.MLtgzfile):
1049 <            inp_box.append(self.MLtgzfile)
1069 <        ## config
1070 <        if not self.pset is None:
1071 <            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1072 <        ## additional input files
1073 <        tgz = self.additionalInputFileTgz()
1074 <        inp_box.append(tgz)
1048 >        wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
1049 >        inp_box.append(common.work_space.pathForTgz() +'job/'+ wrapper)
1050          return inp_box
1051  
1052      def outputSandbox(self, nj):
# Line 1083 | Line 1058 | class Cmssw(JobType):
1058          ## User Declared output files
1059          for out in (self.output_file+self.output_file_sandbox):
1060              n_out = nj + 1
1061 <            out_box.append(self.numberFile_(out,str(n_out)))
1061 >            out_box.append(numberFile(out,str(n_out)))
1062          return out_box
1063  
1089    def prepareSteeringCards(self):
1090        """
1091        Make initial modifications of the user's steering card file.
1092        """
1093        return
1064  
1065      def wsRenameOutput(self, nj):
1066          """
1067          Returns part of a job script which renames the produced files.
1068          """
1069  
1070 <        txt = '\n'
1070 >        txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1071          txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1072          txt += 'echo ">>> current directory content:"\n'
1073 <        txt += 'ls \n'
1073 >        if self.debug_wrapper:
1074 >            txt += 'ls -Al\n'
1075          txt += '\n'
1076  
1106        txt += 'output_exit_status=0\n'
1107
1108        for fileWithSuffix in (self.output_file_sandbox):
1109            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1110            txt += '\n'
1111            txt += '# check output file\n'
1112            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1113            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1114            txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1115            txt += 'else\n'
1116            txt += '    exit_status=60302\n'
1117            txt += '    echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1118            if common.scheduler.name() == 'CONDOR_G':
1119                txt += '    if [ $middleware == OSG ]; then \n'
1120                txt += '        echo "prepare dummy output file"\n'
1121                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1122                txt += '    fi \n'
1123            txt += 'fi\n'
1124
1077          for fileWithSuffix in (self.output_file):
1078 <            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1078 >            output_file_num = numberFile(fileWithSuffix, '$NJob')
1079              txt += '\n'
1080              txt += '# check output file\n'
1081              txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
# Line 1134 | Line 1086 | class Cmssw(JobType):
1086                  txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1087                  txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1088              txt += 'else\n'
1089 <            txt += '    exit_status=60302\n'
1090 <            txt += '    echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1091 <            txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1140 <            txt += '    output_exit_status=$exit_status\n'
1141 <            if common.scheduler.name() == 'CONDOR_G':
1089 >            txt += '    job_exit_code=60302\n'
1090 >            txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1091 >            if common.scheduler.name().upper() == 'CONDOR_G':
1092                  txt += '    if [ $middleware == OSG ]; then \n'
1093                  txt += '        echo "prepare dummy output file"\n'
1094                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
# Line 1146 | Line 1096 | class Cmssw(JobType):
1096              txt += 'fi\n'
1097          file_list = []
1098          for fileWithSuffix in (self.output_file):
1099 <             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1099 >             file_list.append(numberFile(fileWithSuffix, '$NJob'))
1100  
1101          txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1102          txt += '\n'
1103          txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1104          txt += 'echo ">>> current directory content:"\n'
1105 <        txt += 'ls \n'
1105 >        if self.debug_wrapper:
1106 >            txt += 'ls -Al\n'
1107          txt += '\n'
1108          txt += 'cd $RUNTIME_AREA\n'
1109          txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1110          return txt
1111  
1161    def numberFile_(self, file, txt):
1162        """
1163        append _'txt' before last extension of a file
1164        """
1165        p = string.split(file,".")
1166        # take away last extension
1167        name = p[0]
1168        for x in p[1:-1]:
1169            name=name+"."+x
1170        # add "_txt"
1171        if len(p)>1:
1172            ext = p[len(p)-1]
1173            result = name + '_' + txt + "." + ext
1174        else:
1175            result = name + '_' + txt
1176
1177        return result
1178
1112      def getRequirements(self, nj=[]):
1113          """
1114          return job requirements to add to jdl files
# Line 1185 | Line 1118 | class Cmssw(JobType):
1118              req='Member("VO-cms-' + \
1119                   self.version + \
1120                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1121 <        ## SL add requirement for OS version only if SL4
1189 <        #reSL4 = re.compile( r'slc4' )
1190 <        if self.executable_arch: # and reSL4.search(self.executable_arch):
1121 >        if self.executable_arch:
1122              req+=' && Member("VO-cms-' + \
1123                   self.executable_arch + \
1124                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1125  
1126          req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1127 +        if common.scheduler.name() == "glitecoll":
1128 +            req += ' && other.GlueCEStateStatus == "Production" '
1129  
1130          return req
1131  
1132      def configFilename(self):
1133          """ return the config filename """
1134 <        return self.name()+'.cfg'
1134 >        # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
1135 >        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1136 >          return self.name()+'.py'
1137 >        else:
1138 >          return self.name()+'.cfg'
1139  
1140      def wsSetupCMSOSGEnvironment_(self):
1141          """
1142          Returns part of a job script which is prepares
1143          the execution environment and which is common for all CMS jobs.
1144          """
1145 <        txt = '    echo ">>> setup CMS OSG environment:"\n'
1145 >        txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1146 >        txt += '    echo ">>> setup CMS OSG environment:"\n'
1147          txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1148          txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1149          txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
# Line 1213 | Line 1151 | class Cmssw(JobType):
1151          txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1152          txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1153          txt += '    else\n'
1154 <        txt += '        echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1155 <        txt += '        echo "JOB_EXIT_STATUS = 10020"\n'
1156 <        txt += '        echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1219 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1220 <        txt += '\n'
1221 <        txt += '        cd $RUNTIME_AREA\n'
1222 <        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1223 <        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
1224 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
1225 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
1226 <        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1227 <        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1228 <        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1229 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1230 <        txt += '        fi\n'
1231 <        txt += '\n'
1232 <        txt += '        exit 1\n'
1154 >        txt += '        echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1155 >        txt += '        job_exit_code=10020\n'
1156 >        txt += '        func_exit\n'
1157          txt += '    fi\n'
1158          txt += '\n'
1159 <        txt += '    echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1159 >        txt += '    echo "==> setup cms environment ok"\n'
1160          txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1161  
1162          return txt
1163  
1240    ### OLI_DANIELE
1164      def wsSetupCMSLCGEnvironment_(self):
1165          """
1166          Returns part of a job script which is prepares
1167          the execution environment and which is common for all CMS jobs.
1168          """
1169 <        txt = '    echo ">>> setup CMS LCG environment:"\n'
1169 >        txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1170 >        txt += '    echo ">>> setup CMS LCG environment:"\n'
1171          txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1172          txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1173          txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1174          txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1175 <        txt += '        echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1176 <        txt += '        echo "JOB_EXIT_STATUS = 10031" \n'
1177 <        txt += '        echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1254 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1255 <        txt += '        exit 1\n'
1175 >        txt += '        echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1176 >        txt += '        job_exit_code=10031\n'
1177 >        txt += '        func_exit\n'
1178          txt += '    else\n'
1179          txt += '        echo "Sourcing environment... "\n'
1180          txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1181 <        txt += '            echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1182 <        txt += '            echo "JOB_EXIT_STATUS = 10020"\n'
1183 <        txt += '            echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1262 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1263 <        txt += '            exit 1\n'
1181 >        txt += '            echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1182 >        txt += '            job_exit_code=10020\n'
1183 >        txt += '            func_exit\n'
1184          txt += '        fi\n'
1185          txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1186          txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1187          txt += '        result=$?\n'
1188          txt += '        if [ $result -ne 0 ]; then\n'
1189 <        txt += '            echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1190 <        txt += '            echo "JOB_EXIT_STATUS = 10032"\n'
1191 <        txt += '            echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1272 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1273 <        txt += '            exit 1\n'
1189 >        txt += '            echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1190 >        txt += '            job_exit_code=10032\n'
1191 >        txt += '            func_exit\n'
1192          txt += '        fi\n'
1193          txt += '    fi\n'
1194          txt += '    \n'
1195 <        txt += '    echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1195 >        txt += '    echo "==> setup cms environment ok"\n'
1196          return txt
1197  
1280    ### FEDE FOR DBS OUTPUT PUBLICATION
1198      def modifyReport(self, nj):
1199          """
1200          insert the part of the script that modifies the FrameworkJob Report
1201          """
1202 <
1203 <        txt = ''
1287 <        try:
1288 <            publish_data = int(self.cfg_params['USER.publish_data'])
1289 <        except KeyError:
1290 <            publish_data = 0
1202 >        txt = '\n#Written by cms_cmssw::modifyReport\n'
1203 >        publish_data = int(self.cfg_params.get('USER.publish_data',0))
1204          if (publish_data == 1):
1205 <            txt += 'echo ">>> Modify Job Report:" \n'
1206 <            ################ FEDE FOR DBS2 #############################################
1207 <            #txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1208 <            txt += 'chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1209 <            #############################################################################
1205 >            processedDataset = self.cfg_params['USER.publish_data_name']
1206 >            ### FEDE  for publication with LSF and CAF schedulers ####
1207 >            print "common.scheduler.name().upper() = ", common.scheduler.name().upper()
1208 >            if (common.scheduler.name().upper() == "CAF" or common.scheduler.name().upper() == "LSF"):
1209 >                print "chiamo LFNBaseName con localUser = true"
1210 >                LFNBaseName = LFNBase(processedDataset, LocalUser=True)
1211 >            else :    
1212 >                LFNBaseName = LFNBase(processedDataset)
1213 >            ####    
1214  
1215 <            txt += 'if [ -z "$SE" ]; then\n'
1216 <            txt += '    SE="" \n'
1217 <            txt += 'fi \n'
1218 <            txt += 'if [ -z "$SE_PATH" ]; then\n'
1219 <            txt += '    SE_PATH="" \n'
1220 <            txt += 'fi \n'
1221 <            txt += 'echo "SE = $SE"\n'
1305 <            txt += 'echo "SE_PATH = $SE_PATH"\n'
1215 >            txt += 'if [ $copy_exit_status -eq 0 ]; then\n'
1216 >            txt += '    FOR_LFN=%s_${PSETHASH}/\n'%(LFNBaseName)
1217 >            txt += 'else\n'
1218 >            txt += '    FOR_LFN=/copy_problems/ \n'
1219 >            txt += '    SE=""\n'
1220 >            txt += '    SE_PATH=""\n'
1221 >            txt += 'fi\n'
1222  
1223 <            processedDataset = self.cfg_params['USER.publish_data_name']
1223 >            txt += 'echo ">>> Modify Job Report:" \n'
1224 >            txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1225              txt += 'ProcessedDataset='+processedDataset+'\n'
1309            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1310            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1311            #### FEDE: added slash in LFN ##############
1312            txt += '    FOR_LFN=/copy_problems/ \n'
1313            txt += 'else \n'
1314            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1315            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1316            txt += '    FOR_LFN=/store$tmp \n'
1317            txt += 'fi \n'
1226              txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1227 +            txt += 'echo "SE = $SE"\n'
1228 +            txt += 'echo "SE_PATH = $SE_PATH"\n'
1229              txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1230              txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1231 <            txt += 'echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1232 <            txt += '$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1323 <
1231 >            txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1232 >            txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1233              txt += 'modifyReport_result=$?\n'
1325            txt += 'echo modifyReport_result = $modifyReport_result\n'
1234              txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1235 <            txt += '    exit_status=1\n'
1236 <            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1235 >            txt += '    modifyReport_result=70500\n'
1236 >            txt += '    job_exit_code=$modifyReport_result\n'
1237 >            txt += '    echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1238 >            txt += '    echo "WARNING: Problem with ModifyJobReport"\n'
1239              txt += 'else\n'
1240 <            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1240 >            txt += '    mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1241              txt += 'fi\n'
1332        else:
1333            txt += 'echo "no data publication required"\n'
1242          return txt
1243  
1244 <    def cleanEnv(self):
1245 <        txt = ''
1246 <        txt += 'if [ $middleware == OSG ]; then\n'
1247 <        txt += '    cd $RUNTIME_AREA\n'
1248 <        txt += '    echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1249 <        txt += '    echo ">>> Remove working directory: $WORKING_DIR"\n'
1250 <        txt += '    /bin/rm -rf $WORKING_DIR\n'
1251 <        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1252 <        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1253 <        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
1254 <        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1255 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1244 >    def wsParseFJR(self):
1245 >        """
1246 >        Parse the FrameworkJobReport to obtain useful infos
1247 >        """
1248 >        txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1249 >        txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1250 >        txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1251 >        txt += '    if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1252 >        txt += '        cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1253 >        if self.debug_wrapper :
1254 >            txt += '        echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1255 >        txt += '        executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1256 >        txt += '        if [ $executable_exit_status -eq 50115 ];then\n'
1257 >        txt += '            echo ">>> crab_fjr.xml contents: "\n'
1258 >        txt += '            cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1259 >        txt += '            echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1260 >        txt += '        elif [ $executable_exit_status -eq -999 ];then\n'
1261 >        txt += '            echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1262 >        txt += '        else\n'
1263 >        txt += '            echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1264 >        txt += '        fi\n'
1265 >        txt += '    else\n'
1266 >        txt += '        echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1267          txt += '    fi\n'
1268 +          #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1269 +
1270 +        if (self.datasetPath and not self.dataset_pu ):
1271 +          # VERIFY PROCESSED DATA
1272 +            txt += '    if [ $executable_exit_status -eq 0 ];then\n'
1273 +            txt += '      echo ">>> Verify list of processed files:"\n'
1274 +            txt += '      echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1275 +            txt += '      python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1276 +            txt += '      cat input-files.txt  | sort | uniq > tmp.txt\n'
1277 +            txt += '      mv tmp.txt input-files.txt\n'
1278 +            txt += '      echo "cat input-files.txt"\n'
1279 +            txt += '      echo "----------------------"\n'
1280 +            txt += '      cat input-files.txt\n'
1281 +            txt += '      cat processed-files.txt | sort | uniq > tmp.txt\n'
1282 +            txt += '      mv tmp.txt processed-files.txt\n'
1283 +            txt += '      echo "----------------------"\n'
1284 +            txt += '      echo "cat processed-files.txt"\n'
1285 +            txt += '      echo "----------------------"\n'
1286 +            txt += '      cat processed-files.txt\n'
1287 +            txt += '      echo "----------------------"\n'
1288 +            txt += '      diff -q input-files.txt processed-files.txt\n'
1289 +            txt += '      fileverify_status=$?\n'
1290 +            txt += '      if [ $fileverify_status -ne 0 ]; then\n'
1291 +            txt += '         executable_exit_status=30001\n'
1292 +            txt += '         echo "ERROR ==> not all input files processed"\n'
1293 +            txt += '         echo "      ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1294 +            txt += '         echo "      ==> diff input-files.txt processed-files.txt"\n'
1295 +            txt += '      fi\n'
1296 +            txt += '    fi\n'
1297 +            txt += '\n'
1298 +        txt += 'else\n'
1299 +        txt += '    echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1300          txt += 'fi\n'
1301          txt += '\n'
1302 +        txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1303 +        txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1304 +        txt += 'job_exit_code=$executable_exit_status\n'
1305 +
1306          return txt
1307  
1308      def setParam_(self, param, value):
# Line 1365 | Line 1320 | class Cmssw(JobType):
1320              nd[e]=0
1321          return nd.keys()
1322  
1323 <
1369 <    def checkOut(self, limit):
1323 >    def outList(self):
1324          """
1325          check the dimension of the output files
1326          """
1327 <        txt = 'echo ">>> Starting output sandbox limit check :"\n'
1328 <        allOutFiles = ""
1327 >        txt = ''
1328 >        txt += 'echo ">>> list of expected files on output sandbox"\n'
1329          listOutFiles = []
1330 <        txt += 'stdoutFile=`ls *stdout` \n'
1331 <        txt += 'stderrFile=`ls *stderr` \n'
1330 >        stdout = 'CMSSW_$NJob.stdout'
1331 >        stderr = 'CMSSW_$NJob.stderr'
1332          if (self.return_data == 1):
1333 <            for fileOut in (self.output_file+self.output_file_sandbox):
1334 <                allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob') + " $stdoutFile $stderrFile"
1335 <        else:            
1336 <            for fileOut in (self.output_file_sandbox):
1337 <                txt += 'echo " '+fileOut+'";\n'
1338 <                allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob') + " $stdoutFile $stderrFile"
1339 <        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1340 <        txt += 'ls -gGhrta;\n'
1341 <        txt += 'sum=0;\n'
1342 <        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1343 <        txt += '    if [ -e $file ]; then\n'
1344 <        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1391 <        txt += '        sum=`expr $sum + $tt`\n'
1392 <        txt += '    else\n'
1393 <        txt += '        echo "WARNING: output file $file not found!"\n'
1394 <        txt += '    fi\n'
1395 <        txt += 'done\n'
1396 <        txt += 'echo "Total Output dimension: $sum";\n'
1397 <        txt += 'limit='+str(limit)+';\n'
1398 <        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1399 <        txt += 'if [ $limit -lt $sum ]; then\n'
1400 <        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1401 <        txt += '    echo "         checking the output file sizes..."\n'
1402 <        txt += '    tot=0;\n'
1403 <        txt += '    for filefile in '+str(allOutFiles)+' ; do\n'
1404 <        txt += '        dimFile=`ls -gGrta $filefile | awk \'{ print $3 }\';`\n'
1405 <        txt += '        tot=`expr $tot + $tt`;\n'
1406 <        txt += '        if [ $limit -lt $dimFile ]; then\n'
1407 <        txt += '            echo "deleting file: $filefile";\n'
1408 <        txt += '            rm -f $filefile\n'
1409 <        txt += '        elif [ $limit -lt $tot ]; then\n'
1410 <        txt += '            echo "deleting file: $filefile";\n'
1411 <        txt += '            rm -f $filefile\n'
1412 <        txt += '        else\n'
1413 <        txt += '            echo "saving file: $filefile"\n'
1414 <        txt += '        fi\n'
1415 <        txt += '    done\n'
1416 <
1417 <        txt += '    ls -agGhrt;\n'
1418 <        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1419 <        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1420 <        txt += '    exit_status=70000;\n'
1421 <        txt += 'else'
1422 <        txt += '    echo "Total Output dimension $sum is fine.";\n'
1423 <        txt += 'fi\n'
1424 <        txt += 'echo "Ending output sandbox limit check"\n'
1333 >            for file in (self.output_file+self.output_file_sandbox):
1334 >                listOutFiles.append(numberFile(file, '$NJob'))
1335 >            listOutFiles.append(stdout)
1336 >            listOutFiles.append(stderr)
1337 >        else:
1338 >            for file in (self.output_file_sandbox):
1339 >                listOutFiles.append(numberFile(file, '$NJob'))
1340 >            listOutFiles.append(stdout)
1341 >            listOutFiles.append(stderr)
1342 >        txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1343 >        txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1344 >        txt += 'export filesToCheck\n'
1345          return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines