ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.183 by spiga, Wed Apr 30 18:21:07 2008 UTC vs.
Revision 1.221 by fanzago, Wed Jun 18 14:02:42 2008 UTC

# Line 10 | Line 10 | from LFNBaseName import *
10   import os, string, glob
11  
12   class Cmssw(JobType):
13 <    def __init__(self, cfg_params, ncjobs):
13 >    def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
14          JobType.__init__(self, 'CMSSW')
15          common.logger.debug(3,'CMSSW::__init__')
16 <
16 >        self.skip_blocks = skip_blocks
17 >
18          self.argsList = []
19  
20          self._params = {}
# Line 35 | Line 36 | class Cmssw(JobType):
36          self.executable_arch = self.scram.getArch()
37          self.tgz_name = 'default.tgz'
38          self.scriptName = 'CMSSW.sh'
39 <        self.pset = ''      #scrip use case Da
40 <        self.datasetPath = '' #scrip use case Da
39 >        self.pset = ''
40 >        self.datasetPath = ''
41  
42          # set FJR file name
43          self.fjrFileName = 'crab_fjr.xml'
44  
45          self.version = self.scram.getSWVersion()
46          version_array = self.version.split('_')
47 <        self.major_version = 0
48 <        self.minor_version = 0
47 >        self.CMSSW_major = 0
48 >        self.CMSSW_minor = 0
49 >        self.CMSSW_patch = 0
50          try:
51 <            self.major_version = int(version_array[1])
52 <            self.minor_version = int(version_array[2])
51 >            self.CMSSW_major = int(version_array[1])
52 >            self.CMSSW_minor = int(version_array[2])
53 >            self.CMSSW_patch = int(version_array[3])
54          except:
55 <            msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
55 >            msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
56              raise CrabException(msg)
57  
55
56        #
57        # Try to block creation in case of arch/version mismatch
58        #
59
60 #        a = string.split(self.version, "_")
61 #
62 #        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
63 #            msg = "Warning: You are using %s version of CMSSW  with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
64 #            common.logger.message(msg)
65 #        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
66 #            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
67 #            raise CrabException(msg)
68 #
69
70
58          ### collect Data cards
59  
60          if not cfg_params.has_key('CMSSW.datasetpath'):
61              msg = "Error: datasetpath not defined "
62              raise CrabException(msg)
63 +        
64 +        ### Temporary: added to remove input file control in the case of PU
65 +        if not cfg_params.has_key('USER.dataset_pu'):
66 +            self.dataset_pu = 'NONE'
67 +        else:
68 +            self.dataset_pu = cfg_params['USER.dataset_pu']
69 +        ####    
70 +        
71          tmp =  cfg_params['CMSSW.datasetpath']
72          log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
73          if string.lower(tmp)=='none':
# Line 83 | Line 78 | class Cmssw(JobType):
78              self.selectNoInput = 0
79  
80          self.dataTiers = []
81 <
81 >        self.debugWrap = ''
82 >        self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False)
83 >        if self.debug_wrapper: self.debugWrap='--debug'
84          ## now the application
85          self.executable = cfg_params.get('CMSSW.executable','cmsRun')
86          log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
# Line 106 | Line 103 | class Cmssw(JobType):
103          self.output_file_sandbox.append(self.fjrFileName)
104  
105          # other output files to be returned via sandbox or copied to SE
106 +        outfileflag = False
107          self.output_file = []
108          tmp = cfg_params.get('CMSSW.output_file',None)
109          if tmp :
110 <            tmpOutFiles = string.split(tmp,',')
111 <            log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
112 <            for tmp in tmpOutFiles:
113 <                tmp=string.strip(tmp)
116 <                self.output_file.append(tmp)
117 <                pass
118 <        else:
119 <            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
120 <        pass
110 >            self.output_file = [x.strip() for x in tmp.split(',')]
111 >            outfileflag = True #output found
112 >        #else:
113 >        #    log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
114  
115          # script_exe file as additional file in inputSandbox
116          self.scriptExe = cfg_params.get('USER.script_exe',None)
# Line 127 | Line 120 | class Cmssw(JobType):
120                  raise CrabException(msg)
121              self.additional_inbox_files.append(string.strip(self.scriptExe))
122  
130        #CarlosDaniele
123          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
124              msg ="Error. script_exe  not defined"
125              raise CrabException(msg)
126  
127 +        # use parent files...
128 +        self.useParent = self.cfg_params.get('CMSSW.use_parent',False)
129 +
130          ## additional input files
131          if cfg_params.has_key('USER.additional_input_files'):
132              tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
# Line 150 | Line 145 | class Cmssw(JobType):
145                      if not os.path.exists(file):
146                          raise CrabException("Additional input file not found: "+file)
147                      pass
153                    # fname = string.split(file, '/')[-1]
154                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
155                    # shutil.copyfile(file, storedFile)
148                      self.additional_inbox_files.append(string.strip(file))
149                  pass
150              pass
# Line 178 | Line 170 | class Cmssw(JobType):
170          if cfg_params.has_key('CMSSW.total_number_of_events'):
171              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
172              self.selectTotalNumberEvents = 1
173 +            if self.selectNumberOfJobs  == 1:
174 +                if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
175 +                    msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
176 +                    raise CrabException(msg)
177          else:
178              self.total_number_of_events = 0
179              self.selectTotalNumberEvents = 0
180  
181 <        if self.pset != None: #CarlosDaniele
181 >        if self.pset != None:
182               if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
183                   msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
184                   raise CrabException(msg)
# Line 212 | Line 208 | class Cmssw(JobType):
208          if self.sourceSeed:
209              print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
210              self.incrementSeeds.append('sourceSeed')
211 +            self.incrementSeeds.append('theSource')
212  
213          self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
214          if self.sourceSeedVtx:
# Line 230 | Line 227 | class Cmssw(JobType):
227  
228          self.firstRun = cfg_params.get('CMSSW.first_run',None)
229  
233        if self.pset != None: #CarlosDaniele
234            import PsetManipulator as pp
235            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
230  
231          # Copy/return
238
232          self.copy_data = int(cfg_params.get('USER.copy_data',0))
233          self.return_data = int(cfg_params.get('USER.return_data',0))
234  
# Line 251 | Line 244 | class Cmssw(JobType):
244              blockSites = self.DataDiscoveryAndLocation(cfg_params)
245          #DBSDLS-end
246  
254
247          ## Select Splitting
248          if self.selectNoInput:
249 <            if self.pset == None: #CarlosDaniele
249 >            if self.pset == None:
250                  self.jobSplittingForScript()
251              else:
252                  self.jobSplittingNoInput()
253          else:
254              self.jobSplittingByBlocks(blockSites)
255  
256 <        # modify Pset
257 <        if self.pset != None: #CarlosDaniele
258 <            try:
259 <                # Add FrameworkJobReport to parameter-set, set max events.
260 <                # Reset later for data jobs by writeCFG which does all modifications
261 <                PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
262 <                PsetEdit.maxEvent(self.eventsPerJob)
263 <                PsetEdit.psetWriter(self.configFilename())
264 <            except:
265 <                msg='Error while manipuliating ParameterSet: exiting...'
266 <                raise CrabException(msg)
267 <        self.tgzNameWithPath = self.getTarBall(self.executable)
256 >        # modify Pset only the first time
257 >        if isNew:
258 >            if self.pset != None:
259 >                import PsetManipulator as pp
260 >                PsetEdit = pp.PsetManipulator(self.pset)
261 >                try:
262 >                    # Add FrameworkJobReport to parameter-set, set max events.
263 >                    # Reset later for data jobs by writeCFG which does all modifications
264 >                    PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
265 >                    PsetEdit.maxEvent(self.eventsPerJob)
266 >                    PsetEdit.psetWriter(self.configFilename())
267 >                    ## If present, add TFileService to output files
268 >                    if not int(cfg_params.get('CMSSW.skip_TFileService_output',0)):
269 >                        tfsOutput = PsetEdit.getTFileService()
270 >                        if tfsOutput:
271 >                            if tfsOutput in self.output_file:
272 >                                common.logger.debug(5,"Output from TFileService "+tfsOutput+" already in output files")
273 >                            else:
274 >                                outfileflag = True #output found
275 >                                self.output_file.append(tfsOutput)
276 >                                common.logger.message("Adding "+tfsOutput+" to output files (from TFileService)")
277 >                            pass
278 >                        pass
279 >                    ## If present and requested, add PoolOutputModule to output files
280 >                    if int(cfg_params.get('CMSSW.get_edm_output',0)):
281 >                        edmOutput = PsetEdit.getPoolOutputModule()
282 >                        if edmOutput:
283 >                            if edmOutput in self.output_file:
284 >                                common.logger.debug(5,"Output from PoolOutputModule "+edmOutput+" already in output files")
285 >                            else:
286 >                                self.output_file.append(edmOutput)
287 >                                common.logger.message("Adding "+edmOutput+" to output files (from PoolOutputModule)")
288 >                            pass
289 >                        pass
290 >                except CrabException:
291 >                    msg='Error while manipulating ParameterSet: exiting...'
292 >                    raise CrabException(msg)
293 >            ## Prepare inputSandbox TarBall (only the first time)  
294 >            self.tgzNameWithPath = self.getTarBall(self.executable)
295  
296      def DataDiscoveryAndLocation(self, cfg_params):
297  
# Line 285 | Line 304 | class Cmssw(JobType):
304          ## Contact the DBS
305          common.logger.message("Contacting Data Discovery Services ...")
306          try:
307 <            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
307 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
308              self.pubdata.fetchDBSInfo()
309  
310          except DataDiscovery.NotExistingDatasetError, ex :
# Line 301 | Line 320 | class Cmssw(JobType):
320          self.filesbyblock=self.pubdata.getFiles()
321          self.eventsbyblock=self.pubdata.getEventsPerBlock()
322          self.eventsbyfile=self.pubdata.getEventsPerFile()
323 +        self.parentFiles=self.pubdata.getParent()
324  
325          ## get max number of events
326 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
326 >        self.maxEvents=self.pubdata.getMaxEvents()
327  
328          ## Contact the DLS and build a list of sites hosting the fileblocks
329          try:
# Line 327 | Line 347 | class Cmssw(JobType):
347  
348          return sites
349  
330  # to Be Removed  DS -- BL
331  #  def setArgsList(self, argsList):
332  #      self.argsList = argsList
333
350      def jobSplittingByBlocks(self, blockSites):
351          """
352          Perform job splitting. Jobs run over an integer number of files
# Line 421 | Line 437 | class Cmssw(JobType):
437  
438                  # ---- Iterate over the files in the block until we've met the requested ---- #
439                  # ---- total # of events or we've gone over all the files in this block  ---- #
440 +                pString=''
441                  while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
442                      file = files[fileCount]
443 +                    if self.useParent:
444 +                        parent = self.parentFiles[file]
445 +                        for f in parent :
446 +                            pString += '\\\"' + f + '\\\"\,'
447 +                        common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
448 +                        common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
449                      if newFile :
450                          try:
451                              numEventsInFile = self.eventsbyfile[file]
# Line 443 | Line 466 | class Cmssw(JobType):
466                              # end job using last file, use remaining events in block
467                              # close job and touch new file
468                              fullString = parString[:-2]
469 <                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
469 >                            if self.useParent:
470 >                                fullParentString = pString[:-2]
471 >                                list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
472 >                            else:
473 >                                list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
474                              common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
475                              self.jobDestination.append(blockSites[block])
476                              common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 455 | Line 482 | class Cmssw(JobType):
482                              eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
483                              jobSkipEventCount = 0
484                              # reset file
485 +                            pString = ""
486                              parString = ""
487                              filesEventCount = 0
488                              newFile = 1
# Line 467 | Line 495 | class Cmssw(JobType):
495                      elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
496                          # close job and touch new file
497                          fullString = parString[:-2]
498 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
498 >                        if self.useParent:
499 >                            fullParentString = pString[:-2]
500 >                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
501 >                        else:
502 >                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
503                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
504                          self.jobDestination.append(blockSites[block])
505                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 478 | Line 510 | class Cmssw(JobType):
510                          eventsRemaining = eventsRemaining - eventsPerJobRequested
511                          jobSkipEventCount = 0
512                          # reset file
513 +                        pString = ""
514                          parString = ""
515                          filesEventCount = 0
516                          newFile = 1
# Line 487 | Line 520 | class Cmssw(JobType):
520                      else :
521                          # close job but don't touch new file
522                          fullString = parString[:-2]
523 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
523 >                        if self.useParent:
524 >                            fullParentString = pString[:-2]
525 >                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
526 >                        else:
527 >                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
528                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
529                          self.jobDestination.append(blockSites[block])
530                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 501 | Line 538 | class Cmssw(JobType):
538                          jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
539                          # remove all but the last file
540                          filesEventCount = self.eventsbyfile[file]
541 +                        if self.useParent:
542 +                            for f in parent : pString += '\\\"' + f + '\\\"\,'
543                          parString = '\\\"' + file + '\\\"\,'
544                      pass # END if
545                  pass # END while (iterate over files in the block)
# Line 609 | Line 648 | class Cmssw(JobType):
648          return
649  
650  
651 <    def jobSplittingForScript(self):#CarlosDaniele
651 >    def jobSplittingForScript(self):
652          """
653          Perform job splitting based on number of job
654          """
# Line 625 | Line 664 | class Cmssw(JobType):
664          # argument is seed number.$i
665          self.list_of_args = []
666          for i in range(self.total_number_of_jobs):
628            ## Since there is no input, any site is good
629           # self.jobDestination.append(["Any"])
667              self.jobDestination.append([""])
631            ## no random seed
668              self.list_of_args.append([str(i)])
669          return
670  
671 <    def split(self, jobParams):
671 >    def split(self, jobParams,firstJobID):
672  
637        #### Fabio
673          njobs = self.total_number_of_jobs
674          arglist = self.list_of_args
675          # create the empty structure
# Line 643 | Line 678 | class Cmssw(JobType):
678  
679          listID=[]
680          listField=[]
681 <        for job in range(njobs):
682 <            jobParams[job] = arglist[job]
681 >        for id in range(njobs):
682 >            job = id + int(firstJobID)
683 >            jobParams[id] = arglist[id]
684              listID.append(job+1)
685              job_ToSave ={}
686              concString = ' '
687              argu=''
688 <            if len(jobParams[job]):
689 <                argu +=   concString.join(jobParams[job] )
690 <            job_ToSave['arguments']= str(job+1)+' '+argu## new BL--DS
691 <            job_ToSave['dlsDestination']= self.jobDestination[job]## new BL--DS
656 <            #common._db.updateJob_(job,job_ToSave)## new BL--DS
688 >            if len(jobParams[id]):
689 >                argu +=   concString.join(jobParams[id] )
690 >            job_ToSave['arguments']= str(job+1)+' '+argu
691 >            job_ToSave['dlsDestination']= self.jobDestination[id]
692              listField.append(job_ToSave)
693              msg="Job "+str(job)+" Arguments:   "+str(job+1)+" "+argu+"\n"  \
694 <            +"                     Destination: "+str(self.jobDestination[job])
694 >            +"                     Destination: "+str(self.jobDestination[id])
695              common.logger.debug(5,msg)
696 <            #common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
662 <        common._db.updateJob_(listID,listField)## new BL--DS
663 <        ## Pay Attention Here....DS--BL
696 >        common._db.updateJob_(listID,listField)
697          self.argsList = (len(jobParams[0])+1)
698  
699          return
700  
701      def numberOfJobs(self):
669        # Fabio
702          return self.total_number_of_jobs
703  
704      def getTarBall(self, exe):
705          """
706          Return the TarBall with lib and exe
707          """
676
677        # if it exist, just return it
678        #
679        # Marco. Let's start to use relative path for Boss XML files
680        #
708          self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
709          if os.path.exists(self.tgzNameWithPath):
710              return self.tgzNameWithPath
# Line 691 | Line 718 | class Cmssw(JobType):
718  
719          # First of all declare the user Scram area
720          swArea = self.scram.getSWArea_()
694        #print "swArea = ", swArea
695        # swVersion = self.scram.getSWVersion()
696        # print "swVersion = ", swVersion
721          swReleaseTop = self.scram.getReleaseTop_()
698        #print "swReleaseTop = ", swReleaseTop
722  
723          ## check if working area is release top
724          if swReleaseTop == '' or swArea == swReleaseTop:
# Line 741 | Line 764 | class Cmssw(JobType):
764                  tar.add(module,moduleDir)
765  
766              ## Now check if any data dir(s) is present
744            swAreaLen=len(swArea)
767              self.dataExist = False
768 <            for root, dirs, files in os.walk(swArea):
769 <                if "data" in dirs:
770 <                    self.dataExist=True
771 <                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
772 <                    tar.add(root+"/data",root[swAreaLen:]+"/data")
768 >            todo_list = [(i, i) for i in  os.listdir(swArea+"/src")]
769 >            while len(todo_list):
770 >                entry, name = todo_list.pop()
771 >                if name.startswith('crab_0_') or  name.startswith('.') or name == 'CVS':
772 >                    continue
773 >                if os.path.isdir(swArea+"/src/"+entry):
774 >                    entryPath = entry + '/'
775 >                    todo_list += [(entryPath + i, i) for i in  os.listdir(swArea+"/src/"+entry)]
776 >                    if name == 'data':
777 >                        self.dataExist=True
778 >                        common.logger.debug(5,"data "+entry+" to be tarred")
779 >                        tar.add(swArea+"/src/"+entry,"src/"+entry)
780 >                    pass
781 >                pass
782  
783              ### CMSSW ParameterSet
784              if not self.pset is None:
# Line 757 | Line 788 | class Cmssw(JobType):
788  
789  
790              ## Add ProdCommon dir to tar
791 <            prodcommonDir = 'ProdCommon'
792 <            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
793 <            if os.path.isdir(prodcommonPath):
794 <                tar.add(prodcommonPath,prodcommonDir)
791 >            prodcommonDir = './'
792 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
793 >            neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools','ProdCommon/Core','ProdCommon/MCPayloads', 'IMProv']
794 >            for file in neededStuff:
795 >                tar.add(prodcommonPath+file,prodcommonDir+file)
796              common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
797  
798              ##### ML stuff
# Line 771 | Line 803 | class Cmssw(JobType):
803              common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
804  
805              ##### Utils
806 <            Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'JobReportErrorCode.py']
806 >            Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py']
807              for file in Utils_file_list:
808                  tar.add(path+file,file)
809              common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
# Line 782 | Line 814 | class Cmssw(JobType):
814              common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
815  
816              tar.close()
817 <        except :
818 <            raise CrabException('Could not create tar-ball')
817 >        except IOError:
818 >            raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
819 >        except tarfile.TarError:
820 >            raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
821  
822          ## check for tarball size
823          tarballinfo = os.stat(self.tgzNameWithPath)
# Line 797 | Line 831 | class Cmssw(JobType):
831          Returns part of a job script which prepares
832          the execution environment for the job 'nj'.
833          """
834 +        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
835 +            psetName = 'pset.py'
836 +        else:
837 +            psetName = 'pset.cfg'
838          # Prepare JobType-independent part
839          txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
840          txt += 'echo ">>> setup environment"\n'
# Line 815 | Line 853 | class Cmssw(JobType):
853          txt += '    cd $WORKING_DIR\n'
854          txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
855          txt += self.wsSetupCMSOSGEnvironment_()
818        #txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
819        #txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
856          txt += 'fi\n'
857  
858          # Prepare JobType-specific part
# Line 855 | Line 891 | class Cmssw(JobType):
891  
892          # Prepare job-specific part
893          job = common.job_list[nj]
858        ### FEDE FOR DBS OUTPUT PUBLICATION
894          if (self.datasetPath):
895              txt += '\n'
896              txt += 'DatasetPath='+self.datasetPath+'\n'
# Line 877 | Line 912 | class Cmssw(JobType):
912              txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
913              if (self.datasetPath): # standard job
914                  txt += 'InputFiles=${args[1]}; export InputFiles\n'
915 <                txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
916 <                txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
915 >                if (self.useParent):  
916 >                    txt += 'ParentFiles=${args[2]}; export ParentFiles\n'
917 >                    txt += 'MaxEvents=${args[3]}; export MaxEvents\n'
918 >                    txt += 'SkipEvents=${args[4]}; export SkipEvents\n'
919 >                else:
920 >                    txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
921 >                    txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
922                  txt += 'echo "Inputfiles:<$InputFiles>"\n'
923 +                if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n'
924                  txt += 'echo "MaxEvents:<$MaxEvents>"\n'
925                  txt += 'echo "SkipEvents:<$SkipEvents>"\n'
926              else:  # pythia like job
# Line 891 | Line 932 | class Cmssw(JobType):
932                      txt += 'FirstRun=${args[1]}; export FirstRun\n'
933                      txt += 'echo "FirstRun: <$FirstRun>"\n'
934  
935 <            txt += 'mv -f '+pset+' pset.cfg\n'
935 >            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
936  
937  
938          if self.pset != None:
939 +            # FUTURE: Can simply for 2_1_x and higher
940              txt += '\n'
941 <            txt += 'echo "***** cat pset.cfg *********"\n'
942 <            txt += 'cat pset.cfg\n'
943 <            txt += 'echo "****** end pset.cfg ********"\n'
944 <            txt += '\n'
945 <            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
941 >            if self.debug_wrapper==True:
942 >                txt += 'echo "***** cat ' + psetName + ' *********"\n'
943 >                txt += 'cat ' + psetName + '\n'
944 >                txt += 'echo "****** end ' + psetName + ' ********"\n'
945 >                txt += '\n'
946 >            txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
947              txt += 'echo "PSETHASH = $PSETHASH" \n'
948              txt += '\n'
949          return txt
# Line 916 | Line 959 | class Cmssw(JobType):
959          if os.path.isfile(self.tgzNameWithPath):
960              txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
961              txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
962 <            txt += 'ls -Al \n'
962 >            if  self.debug_wrapper:
963 >                txt += 'ls -Al \n'
964              txt += 'untar_status=$? \n'
965              txt += 'if [ $untar_status -ne 0 ]; then \n'
966              txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
# Line 926 | Line 970 | class Cmssw(JobType):
970              txt += '   echo "Successful untar" \n'
971              txt += 'fi \n'
972              txt += '\n'
973 <            txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
973 >            txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
974              txt += 'if [ -z "$PYTHONPATH" ]; then\n'
975 <            txt += '   export PYTHONPATH=$RUNTIME_AREA/ProdCommon\n'
975 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
976              txt += 'else\n'
977 <            txt += '   export PYTHONPATH=$RUNTIME_AREA/ProdCommon:${PYTHONPATH}\n'
977 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
978              txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
979              txt += 'fi\n'
980              txt += '\n'
# Line 951 | Line 995 | class Cmssw(JobType):
995          txt += 'rm -r lib/ module/ \n'
996          txt += 'mv $RUNTIME_AREA/lib/ . \n'
997          txt += 'mv $RUNTIME_AREA/module/ . \n'
998 <        if self.dataExist == True: txt += 'mv $RUNTIME_AREA/src/ . \n'
998 >        if self.dataExist == True:
999 >            txt += 'rm -r src/ \n'
1000 >            txt += 'mv $RUNTIME_AREA/src/ . \n'
1001          if len(self.additional_inbox_files)>0:
1002              for file in self.additional_inbox_files:
1003 <                txt += 'mv $RUNTIME_AREA/'+file+' . \n'
1004 <        txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
1003 >                txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
1004 >        # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
1005 >        # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
1006  
1007 +        txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
1008          txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1009 <        txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
1009 >        txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
1010          txt += 'else\n'
1011 <        txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1011 >        txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
1012          txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1013          txt += 'fi\n'
1014          txt += '\n'
1015  
1016          return txt
1017  
970    def modifySteeringCards(self, nj):
971        """
972        modify the card provided by the user,
973        writing a new card into share dir
974        """
1018  
1019      def executableName(self):
1020 <        if self.scriptExe: #CarlosDaniele
1020 >        if self.scriptExe:
1021              return "sh "
1022          else:
1023              return self.executable
# Line 987 | Line 1030 | class Cmssw(JobType):
1030              ex_args = ""
1031              # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
1032              # Framework job report
1033 <            if (self.major_version >= 1 and self.minor_version >= 5) or (self.major_version >= 2):
1033 >            if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
1034                  ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
1035 <            # Type of cfg file
1036 <            if self.major_version >= 2 :
1035 >            # Type of config file
1036 >            if self.CMSSW_major >= 2 :
1037                  ex_args += " -p pset.py"
1038              else:
1039                  ex_args += " -p pset.cfg"
# Line 1001 | Line 1044 | class Cmssw(JobType):
1044          Returns a list of filenames to be put in JDL input sandbox.
1045          """
1046          inp_box = []
1004        # # dict added to delete duplicate from input sandbox file list
1005        # seen = {}
1006        ## code
1047          if os.path.isfile(self.tgzNameWithPath):
1048              inp_box.append(self.tgzNameWithPath)
1049          wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
# Line 1019 | Line 1059 | class Cmssw(JobType):
1059          ## User Declared output files
1060          for out in (self.output_file+self.output_file_sandbox):
1061              n_out = nj + 1
1062 <            out_box.append(self.numberFile_(out,str(n_out)))
1062 >            out_box.append(numberFile(out,str(n_out)))
1063          return out_box
1064  
1025    def prepareSteeringCards(self):
1026        """
1027        Make initial modifications of the user's steering card file.
1028        """
1029        return
1065  
1066      def wsRenameOutput(self, nj):
1067          """
# Line 1036 | Line 1071 | class Cmssw(JobType):
1071          txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1072          txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1073          txt += 'echo ">>> current directory content:"\n'
1074 <        txt += 'ls \n'
1074 >        if self.debug_wrapper:
1075 >            txt += 'ls -Al\n'
1076          txt += '\n'
1077  
1078          for fileWithSuffix in (self.output_file):
1079 <            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1079 >            output_file_num = numberFile(fileWithSuffix, '$NJob')
1080              txt += '\n'
1081              txt += '# check output file\n'
1082              txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
# Line 1061 | Line 1097 | class Cmssw(JobType):
1097              txt += 'fi\n'
1098          file_list = []
1099          for fileWithSuffix in (self.output_file):
1100 <             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1100 >             file_list.append(numberFile(fileWithSuffix, '$NJob'))
1101  
1102          txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1103          txt += '\n'
1104          txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1105          txt += 'echo ">>> current directory content:"\n'
1106 <        txt += 'ls \n'
1106 >        if self.debug_wrapper:
1107 >            txt += 'ls -Al\n'
1108          txt += '\n'
1109          txt += 'cd $RUNTIME_AREA\n'
1110          txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1111          return txt
1112  
1076    def numberFile_(self, file, txt):
1077        """
1078        append _'txt' before last extension of a file
1079        """
1080        p = string.split(file,".")
1081        # take away last extension
1082        name = p[0]
1083        for x in p[1:-1]:
1084            name=name+"."+x
1085        # add "_txt"
1086        if len(p)>1:
1087            ext = p[len(p)-1]
1088            result = name + '_' + txt + "." + ext
1089        else:
1090            result = name + '_' + txt
1091
1092        return result
1093
1113      def getRequirements(self, nj=[]):
1114          """
1115          return job requirements to add to jdl files
# Line 1100 | Line 1119 | class Cmssw(JobType):
1119              req='Member("VO-cms-' + \
1120                   self.version + \
1121                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1122 <        ## SL add requirement for OS version only if SL4
1104 <        #reSL4 = re.compile( r'slc4' )
1105 <        if self.executable_arch: # and reSL4.search(self.executable_arch):
1122 >        if self.executable_arch:
1123              req+=' && Member("VO-cms-' + \
1124                   self.executable_arch + \
1125                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
# Line 1116 | Line 1133 | class Cmssw(JobType):
1133      def configFilename(self):
1134          """ return the config filename """
1135          # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
1136 <        if (self.major_version >= 2 and self.minor_version >= 1) or (self.major_version >= 3):
1136 >        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1137            return self.name()+'.py'
1138          else:
1139            return self.name()+'.cfg'
# Line 1145 | Line 1162 | class Cmssw(JobType):
1162  
1163          return txt
1164  
1148    ### OLI_DANIELE
1165      def wsSetupCMSLCGEnvironment_(self):
1166          """
1167          Returns part of a job script which is prepares
# Line 1180 | Line 1196 | class Cmssw(JobType):
1196          txt += '    echo "==> setup cms environment ok"\n'
1197          return txt
1198  
1183    ### FEDE FOR DBS OUTPUT PUBLICATION
1199      def modifyReport(self, nj):
1200          """
1201          insert the part of the script that modifies the FrameworkJob Report
1202          """
1188
1203          txt = '\n#Written by cms_cmssw::modifyReport\n'
1204          publish_data = int(self.cfg_params.get('USER.publish_data',0))
1205          if (publish_data == 1):
# Line 1201 | Line 1215 | class Cmssw(JobType):
1215              txt += 'fi\n'
1216  
1217              txt += 'echo ">>> Modify Job Report:" \n'
1218 <            txt += 'chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1218 >            txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1219              txt += 'ProcessedDataset='+processedDataset+'\n'
1220              txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1221              txt += 'echo "SE = $SE"\n'
1222              txt += 'echo "SE_PATH = $SE_PATH"\n'
1223              txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1224              txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1225 <            txt += 'echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1226 <            txt += '$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1225 >            txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1226 >            txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1227              txt += 'modifyReport_result=$?\n'
1228              txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1229              txt += '    modifyReport_result=70500\n'
# Line 1221 | Line 1235 | class Cmssw(JobType):
1235              txt += 'fi\n'
1236          return txt
1237  
1238 +    def wsParseFJR(self):
1239 +        """
1240 +        Parse the FrameworkJobReport to obtain useful infos
1241 +        """
1242 +        txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1243 +        txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1244 +        txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1245 +        txt += '    if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1246 +        txt += '        cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1247 +        if self.debug_wrapper :
1248 +            txt += '        echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1249 +        txt += '        executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1250 +        txt += '        if [ $executable_exit_status -eq 50115 ];then\n'
1251 +        txt += '            echo ">>> crab_fjr.xml contents: "\n'
1252 +        txt += '            cat $RUNTIME_AREA/crab_fjr_NJob.xml\n'
1253 +        txt += '            echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1254 +        txt += '        elif [ $executable_exit_status -eq -999 ];then\n'
1255 +        txt += '            echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1256 +        txt += '        else\n'
1257 +        txt += '            echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1258 +        txt += '        fi\n'
1259 +        txt += '    else\n'
1260 +        txt += '        echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1261 +        txt += '    fi\n'
1262 +          #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1263 +
1264 +        if (self.datasetPath and self.dataset_pu == 'NONE'):
1265 +          # VERIFY PROCESSED DATA
1266 +            txt += '    if [ $executable_exit_status -eq 0 ];then\n'
1267 +            txt += '      echo ">>> Verify list of processed files:"\n'
1268 +            txt += '      echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1269 +            txt += '      python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1270 +            txt += '      cat input-files.txt  | sort | uniq > tmp.txt\n'
1271 +            txt += '      mv tmp.txt input-files.txt\n'
1272 +            txt += '      echo "cat input-files.txt"\n'
1273 +            txt += '      echo "----------------------"\n'
1274 +            txt += '      cat input-files.txt\n'
1275 +            txt += '      cat processed-files.txt | sort | uniq > tmp.txt\n'
1276 +            txt += '      mv tmp.txt processed-files.txt\n'
1277 +            txt += '      echo "----------------------"\n'
1278 +            txt += '      echo "cat processed-files.txt"\n'
1279 +            txt += '      echo "----------------------"\n'
1280 +            txt += '      cat processed-files.txt\n'
1281 +            txt += '      echo "----------------------"\n'
1282 +            txt += '      diff -q input-files.txt processed-files.txt\n'
1283 +            txt += '      fileverify_status=$?\n'
1284 +            txt += '      if [ $fileverify_status -ne 0 ]; then\n'
1285 +            txt += '         executable_exit_status=30001\n'
1286 +            txt += '         echo "ERROR ==> not all input files processed"\n'
1287 +            txt += '         echo "      ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1288 +            txt += '         echo "      ==> diff input-files.txt processed-files.txt"\n'
1289 +            txt += '      fi\n'
1290 +            txt += '    fi\n'
1291 +            txt += '\n'
1292 +        txt += 'else\n'
1293 +        txt += '    echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1294 +        txt += 'fi\n'
1295 +        txt += '\n'
1296 +        txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1297 +        txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1298 +        txt += 'job_exit_code=$executable_exit_status\n'
1299 +
1300 +        return txt
1301 +
1302      def setParam_(self, param, value):
1303          self._params[param] = value
1304  
# Line 1247 | Line 1325 | class Cmssw(JobType):
1325          stderr = 'CMSSW_$NJob.stderr'
1326          if (self.return_data == 1):
1327              for file in (self.output_file+self.output_file_sandbox):
1328 <                listOutFiles.append(self.numberFile_(file, '$NJob'))
1328 >                listOutFiles.append(numberFile(file, '$NJob'))
1329              listOutFiles.append(stdout)
1330              listOutFiles.append(stderr)
1331          else:
1332              for file in (self.output_file_sandbox):
1333 <                listOutFiles.append(self.numberFile_(file, '$NJob'))
1333 >                listOutFiles.append(numberFile(file, '$NJob'))
1334              listOutFiles.append(stdout)
1335              listOutFiles.append(stderr)
1336          txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines