ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.137 by slacapra, Fri Nov 16 11:09:31 2007 UTC vs.
Revision 1.196 by ewv, Wed May 28 16:46:17 2008 UTC

# Line 5 | Line 5 | from crab_util import *
5   from BlackWhiteListParser import BlackWhiteListParser
6   import common
7   import Scram
8 + from LFNBaseName import *
9  
10   import os, string, glob
11  
# Line 13 | Line 14 | class Cmssw(JobType):
14          JobType.__init__(self, 'CMSSW')
15          common.logger.debug(3,'CMSSW::__init__')
16  
17 +        self.argsList = []
18 +
19          self._params = {}
20          self.cfg_params = cfg_params
18
21          # init BlackWhiteListParser
22          self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
23  
24 <        try:
23 <            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24 <        except KeyError:
25 <            self.MaxTarBallSize = 9.5
24 >        self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
25  
26          # number of jobs requested to be created, limit obj splitting
27          self.ncjobs = ncjobs
# Line 35 | Line 34 | class Cmssw(JobType):
34          self.executable = ''
35          self.executable_arch = self.scram.getArch()
36          self.tgz_name = 'default.tgz'
38        self.additional_tgz_name = 'additional.tgz'
37          self.scriptName = 'CMSSW.sh'
38 <        self.pset = ''      #scrip use case Da
39 <        self.datasetPath = '' #scrip use case Da
38 >        self.pset = ''
39 >        self.datasetPath = ''
40  
41          # set FJR file name
42          self.fjrFileName = 'crab_fjr.xml'
43  
44          self.version = self.scram.getSWVersion()
45 <
46 <        #
47 <        # Try to block creation in case of arch/version mismatch
48 <        #
49 <
50 <        a = string.split(self.version, "_")
51 <
52 <        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
53 <            msg = "Warning: You are using %s version of CMSSW  with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
54 <            common.logger.message(msg)
57 <        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58 <            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
45 >        version_array = self.version.split('_')
46 >        self.CMSSW_major = 0
47 >        self.CMSSW_minor = 0
48 >        self.CMSSW_patch = 0
49 >        try:
50 >            self.CMSSW_major = int(version_array[1])
51 >            self.CMSSW_minor = int(version_array[2])
52 >            self.CMSSW_patch = int(version_array[3])
53 >        except:
54 >            msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
55              raise CrabException(msg)
56  
61        common.taskDB.setDict('codeVersion',self.version)
62        self.setParam_('application', self.version)
63
57          ### collect Data cards
58  
59 <        try:
67 <            tmp =  cfg_params['CMSSW.datasetpath']
68 <            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
69 <            if string.lower(tmp)=='none':
70 <                self.datasetPath = None
71 <                self.selectNoInput = 1
72 <            else:
73 <                self.datasetPath = tmp
74 <                self.selectNoInput = 0
75 <        except KeyError:
59 >        if not cfg_params.has_key('CMSSW.datasetpath'):
60              msg = "Error: datasetpath not defined "
61              raise CrabException(msg)
62 <
63 <        # ML monitoring
64 <        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
65 <        if not self.datasetPath:
66 <            self.setParam_('dataset', 'None')
83 <            self.setParam_('owner', 'None')
62 >        tmp =  cfg_params['CMSSW.datasetpath']
63 >        log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
64 >        if string.lower(tmp)=='none':
65 >            self.datasetPath = None
66 >            self.selectNoInput = 1
67          else:
68 <            try:
69 <                datasetpath_split = self.datasetPath.split("/")
87 <                # standard style
88 <                self.setParam_('datasetFull', self.datasetPath)
89 <                self.setParam_('dataset', datasetpath_split[1])
90 <                self.setParam_('owner', datasetpath_split[2])
91 <            except:
92 <                self.setParam_('dataset', self.datasetPath)
93 <                self.setParam_('owner', self.datasetPath)
94 <
95 <        self.setTaskid_()
96 <        self.setParam_('taskId', self.cfg_params['taskId'])
68 >            self.datasetPath = tmp
69 >            self.selectNoInput = 0
70  
71          self.dataTiers = []
72  
73 +        self.debug_pset = cfg_params.get('USER.debug_pset',False)
74 +
75          ## now the application
76 <        try:
77 <            self.executable = cfg_params['CMSSW.executable']
103 <            self.setParam_('exe', self.executable)
104 <            log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
105 <            msg = "Default executable cmsRun overridden. Switch to " + self.executable
106 <            log.debug(3,msg)
107 <        except KeyError:
108 <            self.executable = 'cmsRun'
109 <            self.setParam_('exe', self.executable)
110 <            msg = "User executable not defined. Use cmsRun"
111 <            log.debug(3,msg)
112 <            pass
76 >        self.executable = cfg_params.get('CMSSW.executable','cmsRun')
77 >        log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
78  
79 <        try:
115 <            self.pset = cfg_params['CMSSW.pset']
116 <            log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
117 <            if self.pset.lower() != 'none' :
118 <                if (not os.path.exists(self.pset)):
119 <                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
120 <            else:
121 <                self.pset = None
122 <        except KeyError:
79 >        if not cfg_params.has_key('CMSSW.pset'):
80              raise CrabException("PSet file missing. Cannot run cmsRun ")
81 +        self.pset = cfg_params['CMSSW.pset']
82 +        log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
83 +        if self.pset.lower() != 'none' :
84 +            if (not os.path.exists(self.pset)):
85 +                raise CrabException("User defined PSet file "+self.pset+" does not exist")
86 +        else:
87 +            self.pset = None
88  
89          # output files
90          ## stuff which must be returned always via sandbox
# Line 130 | Line 94 | class Cmssw(JobType):
94          self.output_file_sandbox.append(self.fjrFileName)
95  
96          # other output files to be returned via sandbox or copied to SE
97 <        try:
98 <            self.output_file = []
99 <            tmp = cfg_params['CMSSW.output_file']
100 <            if tmp != '':
101 <                tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
102 <                log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
103 <                for tmp in tmpOutFiles:
104 <                    tmp=string.strip(tmp)
141 <                    self.output_file.append(tmp)
142 <                    pass
143 <            else:
144 <                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
97 >        self.output_file = []
98 >        tmp = cfg_params.get('CMSSW.output_file',None)
99 >        if tmp :
100 >            tmpOutFiles = string.split(tmp,',')
101 >            log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
102 >            for tmp in tmpOutFiles:
103 >                tmp=string.strip(tmp)
104 >                self.output_file.append(tmp)
105                  pass
106 <            pass
147 <        except KeyError:
106 >        else:
107              log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
108 <            pass
108 >        pass
109  
110          # script_exe file as additional file in inputSandbox
111 <        try:
112 <            self.scriptExe = cfg_params['USER.script_exe']
113 <            if self.scriptExe != '':
114 <               if not os.path.isfile(self.scriptExe):
115 <                  msg ="ERROR. file "+self.scriptExe+" not found"
116 <                  raise CrabException(msg)
158 <               self.additional_inbox_files.append(string.strip(self.scriptExe))
159 <        except KeyError:
160 <            self.scriptExe = ''
111 >        self.scriptExe = cfg_params.get('USER.script_exe',None)
112 >        if self.scriptExe :
113 >            if not os.path.isfile(self.scriptExe):
114 >                msg ="ERROR. file "+self.scriptExe+" not found"
115 >                raise CrabException(msg)
116 >            self.additional_inbox_files.append(string.strip(self.scriptExe))
117  
162        #CarlosDaniele
118          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
119 <           msg ="Error. script_exe  not defined"
120 <           raise CrabException(msg)
119 >            msg ="Error. script_exe  not defined"
120 >            raise CrabException(msg)
121  
122          ## additional input files
123 <        try:
123 >        if cfg_params.has_key('USER.additional_input_files'):
124              tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
125              for tmp in tmpAddFiles:
126                  tmp = string.strip(tmp)
# Line 182 | Line 137 | class Cmssw(JobType):
137                      if not os.path.exists(file):
138                          raise CrabException("Additional input file not found: "+file)
139                      pass
185                    # fname = string.split(file, '/')[-1]
186                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
187                    # shutil.copyfile(file, storedFile)
140                      self.additional_inbox_files.append(string.strip(file))
141                  pass
142              pass
143              common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
144 <        except KeyError:
193 <            pass
194 <
195 <        # files per job
196 <        try:
197 <            if (cfg_params['CMSSW.files_per_jobs']):
198 <                raise CrabException("files_per_jobs no longer supported.  Quitting.")
199 <        except KeyError:
200 <            pass
144 >        pass
145  
146          ## Events per job
147 <        try:
147 >        if cfg_params.has_key('CMSSW.events_per_job'):
148              self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
149              self.selectEventsPerJob = 1
150 <        except KeyError:
150 >        else:
151              self.eventsPerJob = -1
152              self.selectEventsPerJob = 0
153  
154          ## number of jobs
155 <        try:
155 >        if cfg_params.has_key('CMSSW.number_of_jobs'):
156              self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
157              self.selectNumberOfJobs = 1
158 <        except KeyError:
158 >        else:
159              self.theNumberOfJobs = 0
160              self.selectNumberOfJobs = 0
161  
162 <        try:
162 >        if cfg_params.has_key('CMSSW.total_number_of_events'):
163              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
164              self.selectTotalNumberEvents = 1
165 <        except KeyError:
165 >            if self.selectNumberOfJobs  == 1:
166 >                if int(self.total_number_of_events) < int(self.theNumberOfJobs):
167 >                    msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
168 >                    raise CrabException(msg)
169 >        else:
170              self.total_number_of_events = 0
171              self.selectTotalNumberEvents = 0
172  
173 <        if self.pset != None: #CarlosDaniele
173 >        if self.pset != None:
174               if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
175                   msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
176                   raise CrabException(msg)
# Line 231 | Line 179 | class Cmssw(JobType):
179                   msg = 'Must specify  number_of_jobs.'
180                   raise CrabException(msg)
181  
182 <        ## source seed for pythia
183 <        try:
184 <            self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
185 <        except KeyError:
186 <            self.sourceSeed = None
187 <            common.logger.debug(5,"No seed given")
188 <
189 <        try:
190 <            self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
191 <        except KeyError:
192 <            self.sourceSeedVtx = None
193 <            common.logger.debug(5,"No vertex seed given")
194 <
195 <        try:
196 <            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
197 <        except KeyError:
198 <            self.sourceSeedG4 = None
199 <            common.logger.debug(5,"No g4 sim hits seed given")
182 >        ## New method of dealing with seeds
183 >        self.incrementSeeds = []
184 >        self.preserveSeeds = []
185 >        if cfg_params.has_key('CMSSW.preserve_seeds'):
186 >            tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
187 >            for tmp in tmpList:
188 >                tmp.strip()
189 >                self.preserveSeeds.append(tmp)
190 >        if cfg_params.has_key('CMSSW.increment_seeds'):
191 >            tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
192 >            for tmp in tmpList:
193 >                tmp.strip()
194 >                self.incrementSeeds.append(tmp)
195 >
196 >        ## Old method of dealing with seeds
197 >        ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
198 >        ## remove
199 >        self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
200 >        if self.sourceSeed:
201 >            print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
202 >            self.incrementSeeds.append('sourceSeed')
203 >            self.incrementSeeds.append('theSource')
204 >
205 >        self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
206 >        if self.sourceSeedVtx:
207 >            print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
208 >            self.incrementSeeds.append('VtxSmeared')
209 >
210 >        self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
211 >        if self.sourceSeedG4:
212 >            print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
213 >            self.incrementSeeds.append('g4SimHits')
214 >
215 >        self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
216 >        if self.sourceSeedMix:
217 >            print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
218 >            self.incrementSeeds.append('mix')
219  
220 <        try:
254 <            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
255 <        except KeyError:
256 <            self.sourceSeedMix = None
257 <            common.logger.debug(5,"No mix seed given")
220 >        self.firstRun = cfg_params.get('CMSSW.first_run',None)
221  
259        try:
260            self.firstRun = int(cfg_params['CMSSW.first_run'])
261        except KeyError:
262            self.firstRun = None
263            common.logger.debug(5,"No first run given")
222          if self.pset != None: #CarlosDaniele
223              import PsetManipulator as pp
224              PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
225  
226 +        # Copy/return
227 +
228 +        self.copy_data = int(cfg_params.get('USER.copy_data',0))
229 +        self.return_data = int(cfg_params.get('USER.return_data',0))
230 +
231          #DBSDLS-start
232          ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
233          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
# Line 277 | Line 240 | class Cmssw(JobType):
240              blockSites = self.DataDiscoveryAndLocation(cfg_params)
241          #DBSDLS-end
242  
280        self.tgzNameWithPath = self.getTarBall(self.executable)
243  
244          ## Select Splitting
245          if self.selectNoInput:
246 <            if self.pset == None: #CarlosDaniele
246 >            if self.pset == None:
247                  self.jobSplittingForScript()
248              else:
249                  self.jobSplittingNoInput()
# Line 289 | Line 251 | class Cmssw(JobType):
251              self.jobSplittingByBlocks(blockSites)
252  
253          # modify Pset
254 <        if self.pset != None: #CarlosDaniele
254 >        if self.pset != None:
255              try:
256 <                if (self.datasetPath): # standard job
257 <                    # allow to processa a fraction of events in a file
258 <                    PsetEdit.inputModule("INPUTFILE")
259 <                    PsetEdit.maxEvent(0)
298 <                    PsetEdit.skipEvent(0)
299 <                else:  # pythia like job
300 <                    PsetEdit.maxEvent(self.eventsPerJob)
301 <                    if (self.firstRun):
302 <                        PsetEdit.pythiaFirstRun(0)  #First Run
303 <                    if (self.sourceSeed) :
304 <                        PsetEdit.pythiaSeed(0)
305 <                        if (self.sourceSeedVtx) :
306 <                            PsetEdit.vtxSeed(0)
307 <                        if (self.sourceSeedG4) :
308 <                            PsetEdit.g4Seed(0)
309 <                        if (self.sourceSeedMix) :
310 <                            PsetEdit.mixSeed(0)
311 <                # add FrameworkJobReport to parameter-set
312 <                PsetEdit.addCrabFJR(self.fjrFileName)
256 >                # Add FrameworkJobReport to parameter-set, set max events.
257 >                # Reset later for data jobs by writeCFG which does all modifications
258 >                PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
259 >                PsetEdit.maxEvent(self.eventsPerJob)
260                  PsetEdit.psetWriter(self.configFilename())
261              except:
262 <                msg='Error while manipuliating ParameterSet: exiting...'
262 >                msg='Error while manipulating ParameterSet: exiting...'
263                  raise CrabException(msg)
264 +        self.tgzNameWithPath = self.getTarBall(self.executable)
265  
266      def DataDiscoveryAndLocation(self, cfg_params):
267  
# Line 326 | Line 274 | class Cmssw(JobType):
274          ## Contact the DBS
275          common.logger.message("Contacting Data Discovery Services ...")
276          try:
329
277              self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
278              self.pubdata.fetchDBSInfo()
279  
# Line 345 | Line 292 | class Cmssw(JobType):
292          self.eventsbyfile=self.pubdata.getEventsPerFile()
293  
294          ## get max number of events
295 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
295 >        self.maxEvents=self.pubdata.getMaxEvents()
296  
297          ## Contact the DLS and build a list of sites hosting the fileblocks
298          try:
# Line 419 | Line 366 | class Cmssw(JobType):
366          else :
367              totalNumberOfJobs = self.ncjobs
368  
422
369          blocks = blockSites.keys()
370          blockCount = 0
371          # Backup variable in case self.maxEvents counted events in a non-included block
# Line 474 | Line 420 | class Cmssw(JobType):
420                          except KeyError:
421                              common.logger.message("File "+str(file)+" has unknown number of events: skipping")
422  
423 <
423 >                    eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
424                      # if less events in file remain than eventsPerJobRequested
425 <                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
425 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
426                          # if last file in block
427                          if ( fileCount == numFilesInBlock-1 ) :
428                              # end job using last file, use remaining events in block
# Line 540 | Line 486 | class Cmssw(JobType):
486                          jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
487                          # remove all but the last file
488                          filesEventCount = self.eventsbyfile[file]
489 <                        parString = ""
544 <                        parString += '\\\"' + file + '\\\"\,'
489 >                        parString = '\\\"' + file + '\\\"\,'
490                      pass # END if
491                  pass # END while (iterate over files in the block)
492          pass # END while (iterate over blocks in the dataset)
# Line 561 | Line 506 | class Cmssw(JobType):
506          for block in blocks:
507              if block in jobsOfBlock.keys() :
508                  blockCounter += 1
509 <                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
509 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
510 >                    ','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
511                  if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
512                      noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
513                      bloskNoSite.append( blockCounter )
# Line 581 | Line 527 | class Cmssw(JobType):
527              for range_jobs in noSiteBlock:
528                  msg += str(range_jobs) + virgola
529              msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
530 +            if self.cfg_params.has_key('EDG.se_white_list'):
531 +                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
532 +                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
533 +                msg += 'Please check if the dataset is available at this site!)\n'
534 +            if self.cfg_params.has_key('EDG.ce_white_list'):
535 +                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
536 +                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
537 +                msg += 'Please check if the dataset is available at this site!)\n'
538 +
539              common.logger.message(msg)
540  
541          self.list_of_args = list_of_lists
# Line 629 | Line 584 | class Cmssw(JobType):
584          self.list_of_args = []
585          for i in range(self.total_number_of_jobs):
586              ## Since there is no input, any site is good
632           # self.jobDestination.append(["Any"])
587              self.jobDestination.append([""]) #must be empty to write correctly the xml
588              args=[]
589              if (self.firstRun):
590 <                    ## pythia first run
637 <                #self.list_of_args.append([(str(self.firstRun)+str(i))])
590 >                ## pythia first run
591                  args.append(str(self.firstRun)+str(i))
639            else:
640                ## no first run
641                #self.list_of_args.append([str(i)])
642                args.append(str(i))
643            if (self.sourceSeed):
644                args.append(str(self.sourceSeed)+str(i))
645                if (self.sourceSeedVtx):
646                    ## + vtx random seed
647                    args.append(str(self.sourceSeedVtx)+str(i))
648                if (self.sourceSeedG4):
649                    ## + G4 random seed
650                    args.append(str(self.sourceSeedG4)+str(i))
651                if (self.sourceSeedMix):
652                    ## + Mix random seed
653                    args.append(str(self.sourceSeedMix)+str(i))
654                pass
655            pass
592              self.list_of_args.append(args)
657        pass
658
659        # print self.list_of_args
593  
594          return
595  
596  
597 <    def jobSplittingForScript(self):#CarlosDaniele
597 >    def jobSplittingForScript(self):
598          """
599          Perform job splitting based on number of job
600          """
# Line 677 | Line 610 | class Cmssw(JobType):
610          # argument is seed number.$i
611          self.list_of_args = []
612          for i in range(self.total_number_of_jobs):
680            ## Since there is no input, any site is good
681           # self.jobDestination.append(["Any"])
613              self.jobDestination.append([""])
683            ## no random seed
614              self.list_of_args.append([str(i)])
615          return
616  
617      def split(self, jobParams):
618  
689        common.jobDB.load()
690        #### Fabio
619          njobs = self.total_number_of_jobs
620          arglist = self.list_of_args
621          # create the empty structure
622          for i in range(njobs):
623              jobParams.append("")
624  
625 +        listID=[]
626 +        listField=[]
627          for job in range(njobs):
628              jobParams[job] = arglist[job]
629 <            # print str(arglist[job])
630 <            # print jobParams[job]
631 <            common.jobDB.setArguments(job, jobParams[job])
632 <            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
633 <            common.jobDB.setDestination(job, self.jobDestination[job])
629 >            listID.append(job+1)
630 >            job_ToSave ={}
631 >            concString = ' '
632 >            argu=''
633 >            if len(jobParams[job]):
634 >                argu +=   concString.join(jobParams[job] )
635 >            job_ToSave['arguments']= str(job+1)+' '+argu
636 >            job_ToSave['dlsDestination']= self.jobDestination[job]
637 >            listField.append(job_ToSave)
638 >            msg="Job "+str(job)+" Arguments:   "+str(job+1)+" "+argu+"\n"  \
639 >            +"                     Destination: "+str(self.jobDestination[job])
640 >            common.logger.debug(5,msg)
641 >        common._db.updateJob_(listID,listField)
642 >        self.argsList = (len(jobParams[0])+1)
643  
705        common.jobDB.save()
644          return
645  
708    def getJobTypeArguments(self, nj, sched):
709        result = ''
710        for i in common.jobDB.arguments(nj):
711            result=result+str(i)+" "
712        return result
713
646      def numberOfJobs(self):
715        # Fabio
647          return self.total_number_of_jobs
648  
649      def getTarBall(self, exe):
650          """
651          Return the TarBall with lib and exe
652          """
722
723        # if it exist, just return it
724        #
725        # Marco. Let's start to use relative path for Boss XML files
726        #
653          self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
654          if os.path.exists(self.tgzNameWithPath):
655              return self.tgzNameWithPath
# Line 737 | Line 663 | class Cmssw(JobType):
663  
664          # First of all declare the user Scram area
665          swArea = self.scram.getSWArea_()
740        #print "swArea = ", swArea
741        # swVersion = self.scram.getSWVersion()
742        # print "swVersion = ", swVersion
666          swReleaseTop = self.scram.getReleaseTop_()
744        #print "swReleaseTop = ", swReleaseTop
667  
668          ## check if working area is release top
669          if swReleaseTop == '' or swArea == swReleaseTop:
670 +            common.logger.debug(3,"swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
671              return
672  
673          import tarfile
# Line 787 | Line 710 | class Cmssw(JobType):
710  
711              ## Now check if any data dir(s) is present
712              swAreaLen=len(swArea)
713 +            self.dataExist = False
714              for root, dirs, files in os.walk(swArea):
715                  if "data" in dirs:
716 +                    self.dataExist=True
717                      common.logger.debug(5,"data "+root+"/data"+" to be tarred")
718                      tar.add(root+"/data",root[swAreaLen:]+"/data")
719  
720 <            ## Add ProdAgent dir to tar
721 <            paDir = 'ProdAgentApi'
722 <            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
723 <            if os.path.isdir(pa):
724 <                tar.add(pa,paDir)
720 >            ### CMSSW ParameterSet
721 >            if not self.pset is None:
722 >                cfg_file = common.work_space.jobDir()+self.configFilename()
723 >                tar.add(cfg_file,self.configFilename())
724 >                common.logger.debug(5,"File added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
725 >
726  
727 <            ### FEDE FOR DBS PUBLICATION
802 <            ## Add PRODCOMMON dir to tar
727 >            ## Add ProdCommon dir to tar
728              prodcommonDir = 'ProdCommon'
729              prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
730              if os.path.isdir(prodcommonPath):
731                  tar.add(prodcommonPath,prodcommonDir)
732 <            #############################
732 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
733 >
734 >            ##### ML stuff
735 >            ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
736 >            path=os.environ['CRABDIR'] + '/python/'
737 >            for file in ML_file_list:
738 >                tar.add(path+file,file)
739 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
740 >
741 >            ##### Utils
742 >            Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'JobReportErrorCode.py']
743 >            for file in Utils_file_list:
744 >                tar.add(path+file,file)
745 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
746  
747 +            ##### AdditionalFiles
748 +            for file in self.additional_inbox_files:
749 +                tar.add(file,string.split(file,'/')[-1])
750              common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
751 +
752              tar.close()
753          except :
754              raise CrabException('Could not create tar-ball')
# Line 817 | Line 759 | class Cmssw(JobType):
759              raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
760  
761          ## create tar-ball with ML stuff
820        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
821        try:
822            tar = tarfile.open(self.MLtgzfile, "w:gz")
823            path=os.environ['CRABDIR'] + '/python/'
824            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
825                tar.add(path+file,file)
826            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
827            tar.close()
828        except :
829            raise CrabException('Could not create ML files tar-ball')
830
831        return
762  
763 <    def additionalInputFileTgz(self):
834 <        """
835 <        Put all additional files into a tar ball and return its name
836 <        """
837 <        import tarfile
838 <        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
839 <        tar = tarfile.open(tarName, "w:gz")
840 <        for file in self.additional_inbox_files:
841 <            tar.add(file,string.split(file,'/')[-1])
842 <        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
843 <        tar.close()
844 <        return tarName
845 <
846 <    def wsSetupEnvironment(self, nj):
763 >    def wsSetupEnvironment(self, nj=0):
764          """
765          Returns part of a job script which prepares
766          the execution environment for the job 'nj'.
767          """
768 +        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
769 +            psetName = 'pset.py'
770 +        else:
771 +            psetName = 'pset.cfg'
772          # Prepare JobType-independent part
773 <        txt = ''
773 >        txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
774          txt += 'echo ">>> setup environment"\n'
775          txt += 'if [ $middleware == LCG ]; then \n'
776          txt += self.wsSetupCMSLCGEnvironment_()
777          txt += 'elif [ $middleware == OSG ]; then\n'
778          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
779          txt += '    if [ ! $? == 0 ] ;then\n'
780 <        txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
781 <        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
782 <        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
862 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
863 <        txt += '        exit 1\n'
780 >        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
781 >        txt += '        job_exit_code=10016\n'
782 >        txt += '        func_exit\n'
783          txt += '    fi\n'
784          txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
785          txt += '\n'
# Line 868 | Line 787 | class Cmssw(JobType):
787          txt += '    cd $WORKING_DIR\n'
788          txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
789          txt += self.wsSetupCMSOSGEnvironment_()
871        #txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
872        #txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
790          txt += 'fi\n'
791  
792          # Prepare JobType-specific part
# Line 880 | Line 797 | class Cmssw(JobType):
797          txt += scram+' project CMSSW '+self.version+'\n'
798          txt += 'status=$?\n'
799          txt += 'if [ $status != 0 ] ; then\n'
800 <        txt += '    echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
801 <        txt += '    echo "JOB_EXIT_STATUS = 10034"\n'
802 <        txt += '    echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
886 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
887 <        txt += '    if [ $middleware == OSG ]; then \n'
888 <        txt += '        cd $RUNTIME_AREA\n'
889 <        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
890 <        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
891 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
892 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
893 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
894 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
895 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
896 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
897 <        txt += '        fi\n'
898 <        txt += '    fi \n'
899 <        txt += '    exit 1 \n'
800 >        txt += '    echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
801 >        txt += '    job_exit_code=10034\n'
802 >        txt += '    func_exit\n'
803          txt += 'fi \n'
804          txt += 'cd '+self.version+'\n'
902        ########## FEDE FOR DBS2 ######################
805          txt += 'SOFTWARE_DIR=`pwd`\n'
806          txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
905        ###############################################
906        ### needed grep for bug in scramv1 ###
907        txt += scram+' runtime -sh\n'
807          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
808 <        txt += 'echo $PATH\n'
809 <
808 >        txt += 'if [ $? != 0 ] ; then\n'
809 >        txt += '    echo "ERROR ==> Problem with the command: "\n'
810 >        txt += '    echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
811 >        txt += '    job_exit_code=10034\n'
812 >        txt += '    func_exit\n'
813 >        txt += 'fi \n'
814          # Handle the arguments:
815          txt += "\n"
816          txt += "## number of arguments (first argument always jobnumber)\n"
817          txt += "\n"
818 <        txt += "if [ $nargs -lt 2 ]\n"
818 >        txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
819          txt += "then\n"
820 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
821 <        txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
822 <        txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
920 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
921 <        txt += '    if [ $middleware == OSG ]; then \n'
922 <        txt += '        cd $RUNTIME_AREA\n'
923 <        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
924 <        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
925 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
926 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
927 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
928 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
929 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
930 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
931 <        txt += '        fi\n'
932 <        txt += '    fi \n'
933 <        txt += "    exit 1\n"
820 >        txt += "    echo 'ERROR ==> Too few arguments' +$nargs+ \n"
821 >        txt += '    job_exit_code=50113\n'
822 >        txt += "    func_exit\n"
823          txt += "fi\n"
824          txt += "\n"
825  
826          # Prepare job-specific part
827          job = common.job_list[nj]
939        ### FEDE FOR DBS OUTPUT PUBLICATION
828          if (self.datasetPath):
829              txt += '\n'
830              txt += 'DatasetPath='+self.datasetPath+'\n'
# Line 952 | Line 840 | class Cmssw(JobType):
840              txt += 'PrimaryDataset=null\n'
841              txt += 'DataTier=null\n'
842              txt += 'ApplicationFamily=MCDataTier\n'
843 <        if self.pset != None: #CarlosDaniele
843 >        if self.pset != None:
844              pset = os.path.basename(job.configFilename())
845              txt += '\n'
846              txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
847              if (self.datasetPath): # standard job
848 <                txt += 'InputFiles=${args[1]}\n'
849 <                txt += 'MaxEvents=${args[2]}\n'
850 <                txt += 'SkipEvents=${args[3]}\n'
848 >                txt += 'InputFiles=${args[1]}; export InputFiles\n'
849 >                txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
850 >                txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
851                  txt += 'echo "Inputfiles:<$InputFiles>"\n'
964                txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
852                  txt += 'echo "MaxEvents:<$MaxEvents>"\n'
966                txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
853                  txt += 'echo "SkipEvents:<$SkipEvents>"\n'
968                txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
854              else:  # pythia like job
855 <                seedIndex=1
855 >                txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
856 >                txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
857 >                txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
858 >                txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
859                  if (self.firstRun):
860 <                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
860 >                    txt += 'FirstRun=${args[1]}; export FirstRun\n'
861                      txt += 'echo "FirstRun: <$FirstRun>"\n'
974                    txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
975                    seedIndex=seedIndex+1
862  
863 <                if (self.sourceSeed):
978 <                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
979 <                    txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
980 <                    seedIndex=seedIndex+1
981 <                    ## the following seeds are not always present
982 <                    if (self.sourceSeedVtx):
983 <                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
984 <                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
985 <                        txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
986 <                        seedIndex += 1
987 <                    if (self.sourceSeedG4):
988 <                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
989 <                        txt += 'echo "G4Seed: <$G4Seed>"\n'
990 <                        txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
991 <                        seedIndex += 1
992 <                    if (self.sourceSeedMix):
993 <                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
994 <                        txt += 'echo "MixSeed: <$mixSeed>"\n'
995 <                        txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
996 <                        seedIndex += 1
997 <                    pass
998 <                pass
999 <            txt += 'mv -f '+pset+' pset.cfg\n'
863 >            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
864  
1001        if len(self.additional_inbox_files) > 0:
1002            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1003            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1004            txt += 'fi\n'
1005            pass
865  
866 <        if self.pset != None: #CarlosDaniele
866 >        if self.pset != None:
867 >            # FUTURE: Can simply for 2_1_x and higher
868              txt += '\n'
869 <            txt += 'echo "***** cat pset.cfg *********"\n'
870 <            txt += 'cat pset.cfg\n'
871 <            txt += 'echo "****** end pset.cfg ********"\n'
872 <            txt += '\n'
873 <            ### FEDE FOR DBS OUTPUT PUBLICATION
874 <            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
869 >            if self.debug_pset==True:
870 >                txt += 'echo "***** cat ' + psetName + ' *********"\n'
871 >                txt += 'cat ' + psetName + '\n'
872 >                txt += 'echo "****** end ' + psetName + ' ********"\n'
873 >                txt += '\n'
874 >            txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
875              txt += 'echo "PSETHASH = $PSETHASH" \n'
1016            ##############
876              txt += '\n'
877          return txt
878  
879 <    def wsBuildExe(self, nj=0):
879 >    def wsUntarSoftware(self, nj=0):
880          """
881          Put in the script the commands to build an executable
882          or a library.
883          """
884  
885 <        txt = ""
885 >        txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
886  
887          if os.path.isfile(self.tgzNameWithPath):
888              txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
889              txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
890 +            txt += 'ls -Al \n'
891              txt += 'untar_status=$? \n'
892              txt += 'if [ $untar_status -ne 0 ]; then \n'
893 <            txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
894 <            txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
895 <            txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1036 <            txt += '   if [ $middleware == OSG ]; then \n'
1037 <            txt += '       cd $RUNTIME_AREA\n'
1038 <            txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1039 <            txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
1040 <            txt += '       /bin/rm -rf $WORKING_DIR\n'
1041 <            txt += '       if [ -d $WORKING_DIR ] ;then\n'
1042 <            txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1043 <            txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1044 <            txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1045 <            txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1046 <            txt += '       fi\n'
1047 <            txt += '   fi \n'
1048 <            txt += '   \n'
1049 <            txt += '   exit 1 \n'
893 >            txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
894 >            txt += '   job_exit_code=$untar_status\n'
895 >            txt += '   func_exit\n'
896              txt += 'else \n'
897              txt += '   echo "Successful untar" \n'
898              txt += 'fi \n'
899              txt += '\n'
900 <            txt += 'echo ">>> Include ProdAgentApi and PRODCOMMON in PYTHONPATH:"\n'
900 >            txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
901              txt += 'if [ -z "$PYTHONPATH" ]; then\n'
902 <            #### FEDE FOR DBS OUTPUT PUBLICATION
1057 <            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
902 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/ProdCommon\n'
903              txt += 'else\n'
904 <            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
904 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/ProdCommon:${PYTHONPATH}\n'
905              txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1061            ###################
906              txt += 'fi\n'
907              txt += '\n'
908  
# Line 1066 | Line 910 | class Cmssw(JobType):
910  
911          return txt
912  
913 +    def wsBuildExe(self, nj=0):
914 +        """
915 +        Put in the script the commands to build an executable
916 +        or a library.
917 +        """
918 +
919 +        txt = '\n#Written by cms_cmssw::wsBuildExe\n'
920 +        txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
921 +
922 +        txt += 'rm -r lib/ module/ \n'
923 +        txt += 'mv $RUNTIME_AREA/lib/ . \n'
924 +        txt += 'mv $RUNTIME_AREA/module/ . \n'
925 +        if self.dataExist == True:
926 +            txt += 'rm -r src/ \n'
927 +            txt += 'mv $RUNTIME_AREA/src/ . \n'
928 +        if len(self.additional_inbox_files)>0:
929 +            for file in self.additional_inbox_files:
930 +                txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
931 +        txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
932 +
933 +        txt += 'if [ -z "$PYTHONPATH" ]; then\n'
934 +        txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
935 +        txt += 'else\n'
936 +        txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
937 +        txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
938 +        txt += 'fi\n'
939 +        txt += '\n'
940 +
941 +        return txt
942 +
943      def modifySteeringCards(self, nj):
944          """
945          modify the card provided by the user,
# Line 1073 | Line 947 | class Cmssw(JobType):
947          """
948  
949      def executableName(self):
950 <        if self.scriptExe: #CarlosDaniele
950 >        if self.scriptExe:
951              return "sh "
952          else:
953              return self.executable
954  
955      def executableArgs(self):
956 +        # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
957          if self.scriptExe:#CarlosDaniele
958              return   self.scriptExe + " $NJob"
959          else:
960 <            # if >= CMSSW_1_5_X, add -e
961 <            version_array = self.scram.getSWVersion().split('_')
962 <            major = 0
963 <            minor = 0
964 <            try:
965 <                major = int(version_array[1])
966 <                minor = int(version_array[2])
967 <            except:
1093 <                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1094 <                raise CrabException(msg)
1095 <            if major >= 1 and minor >= 5 :
1096 <                return " -e -p pset.cfg"
960 >            ex_args = ""
961 >            # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
962 >            # Framework job report
963 >            if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
964 >                ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
965 >            # Type of config file
966 >            if self.CMSSW_major >= 2 :
967 >                ex_args += " -p pset.py"
968              else:
969 <                return " -p pset.cfg"
969 >                ex_args += " -p pset.cfg"
970 >            return ex_args
971  
972      def inputSandbox(self, nj):
973          """
974          Returns a list of filenames to be put in JDL input sandbox.
975          """
976          inp_box = []
1105        # # dict added to delete duplicate from input sandbox file list
1106        # seen = {}
1107        ## code
977          if os.path.isfile(self.tgzNameWithPath):
978              inp_box.append(self.tgzNameWithPath)
979 <        if os.path.isfile(self.MLtgzfile):
980 <            inp_box.append(self.MLtgzfile)
1112 <        ## config
1113 <        if not self.pset is None:
1114 <            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1115 <        ## additional input files
1116 <        tgz = self.additionalInputFileTgz()
1117 <        inp_box.append(tgz)
979 >        wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
980 >        inp_box.append(common.work_space.pathForTgz() +'job/'+ wrapper)
981          return inp_box
982  
983      def outputSandbox(self, nj):
# Line 1140 | Line 1003 | class Cmssw(JobType):
1003          Returns part of a job script which renames the produced files.
1004          """
1005  
1006 <        txt = '\n'
1007 <        txt += 'echo" >>> directory content:"\n'
1006 >        txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1007 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1008 >        txt += 'echo ">>> current directory content:"\n'
1009          txt += 'ls \n'
1010 <        txt = '\n'
1147 <
1148 <        txt += 'output_exit_status=0\n'
1149 <
1150 <        for fileWithSuffix in (self.output_file_sandbox):
1151 <            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1152 <            txt += '\n'
1153 <            txt += '# check output file\n'
1154 <            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1155 <            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1156 <            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1157 <            txt += 'else\n'
1158 <            txt += '    exit_status=60302\n'
1159 <            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1160 <            if common.scheduler.boss_scheduler_name == 'condor_g':
1161 <                txt += '    if [ $middleware == OSG ]; then \n'
1162 <                txt += '        echo "prepare dummy output file"\n'
1163 <                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1164 <                txt += '    fi \n'
1165 <            txt += 'fi\n'
1010 >        txt += '\n'
1011  
1012          for fileWithSuffix in (self.output_file):
1013              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1014              txt += '\n'
1015              txt += '# check output file\n'
1016              txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1017 <            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1018 <            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1017 >            if (self.copy_data == 1):  # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1018 >                txt += '    mv '+fileWithSuffix+' '+output_file_num+'\n'
1019 >                txt += '    ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1020 >            else:
1021 >                txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1022 >                txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1023              txt += 'else\n'
1024 <            txt += '    exit_status=60302\n'
1025 <            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1026 <            txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1178 <            txt += '    output_exit_status=$exit_status\n'
1179 <            if common.scheduler.boss_scheduler_name == 'condor_g':
1024 >            txt += '    job_exit_code=60302\n'
1025 >            txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1026 >            if common.scheduler.name().upper() == 'CONDOR_G':
1027                  txt += '    if [ $middleware == OSG ]; then \n'
1028                  txt += '        echo "prepare dummy output file"\n'
1029                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
# Line 1187 | Line 1034 | class Cmssw(JobType):
1034               file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1035  
1036          txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1037 +        txt += '\n'
1038 +        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1039 +        txt += 'echo ">>> current directory content:"\n'
1040 +        txt += 'ls \n'
1041 +        txt += '\n'
1042          txt += 'cd $RUNTIME_AREA\n'
1043          txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1044          return txt
# Line 1218 | Line 1070 | class Cmssw(JobType):
1070              req='Member("VO-cms-' + \
1071                   self.version + \
1072                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1073 <        ## SL add requirement for OS version only if SL4
1222 <        #reSL4 = re.compile( r'slc4' )
1223 <        if self.executable_arch: # and reSL4.search(self.executable_arch):
1073 >        if self.executable_arch:
1074              req+=' && Member("VO-cms-' + \
1075                   self.executable_arch + \
1076                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1077  
1078          req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1079 +        if common.scheduler.name() == "glitecoll":
1080 +            req += ' && other.GlueCEStateStatus == "Production" '
1081  
1082          return req
1083  
1084      def configFilename(self):
1085          """ return the config filename """
1086 <        return self.name()+'.cfg'
1086 >        # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
1087 >        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1088 >          return self.name()+'.py'
1089 >        else:
1090 >          return self.name()+'.cfg'
1091  
1092      def wsSetupCMSOSGEnvironment_(self):
1093          """
1094          Returns part of a job script which is prepares
1095          the execution environment and which is common for all CMS jobs.
1096          """
1097 <        txt = '    echo ">>> setup CMS OSG environment:"\n'
1097 >        txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1098 >        txt += '    echo ">>> setup CMS OSG environment:"\n'
1099          txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1100          txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1101          txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
# Line 1246 | Line 1103 | class Cmssw(JobType):
1103          txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1104          txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1105          txt += '    else\n'
1106 <        txt += '        echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1107 <        txt += '        echo "JOB_EXIT_STATUS = 10020"\n'
1108 <        txt += '        echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1252 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1253 <        txt += '\n'
1254 <        txt += '        cd $RUNTIME_AREA\n'
1255 <        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1256 <        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
1257 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
1258 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
1259 <        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1260 <        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1261 <        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1262 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1263 <        txt += '        fi\n'
1264 <        txt += '\n'
1265 <        txt += '        exit 1\n'
1106 >        txt += '        echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1107 >        txt += '        job_exit_code=10020\n'
1108 >        txt += '        func_exit\n'
1109          txt += '    fi\n'
1110          txt += '\n'
1111 <        txt += '    echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1111 >        txt += '    echo "==> setup cms environment ok"\n'
1112          txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1113  
1114          return txt
1115  
1273    ### OLI_DANIELE
1116      def wsSetupCMSLCGEnvironment_(self):
1117          """
1118          Returns part of a job script which is prepares
1119          the execution environment and which is common for all CMS jobs.
1120          """
1121 <        txt = '    echo ">>> setup CMS LCG environment:"\n'
1121 >        txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1122 >        txt += '    echo ">>> setup CMS LCG environment:"\n'
1123          txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1124          txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1125          txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1126          txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1127 <        txt += '        echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1128 <        txt += '        echo "JOB_EXIT_STATUS = 10031" \n'
1129 <        txt += '        echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1287 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1288 <        txt += '        exit 1\n'
1127 >        txt += '        echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1128 >        txt += '        job_exit_code=10031\n'
1129 >        txt += '        func_exit\n'
1130          txt += '    else\n'
1131          txt += '        echo "Sourcing environment... "\n'
1132          txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1133 <        txt += '            echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1134 <        txt += '            echo "JOB_EXIT_STATUS = 10020"\n'
1135 <        txt += '            echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1295 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1296 <        txt += '            exit 1\n'
1133 >        txt += '            echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1134 >        txt += '            job_exit_code=10020\n'
1135 >        txt += '            func_exit\n'
1136          txt += '        fi\n'
1137          txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1138          txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1139          txt += '        result=$?\n'
1140          txt += '        if [ $result -ne 0 ]; then\n'
1141 <        txt += '            echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1142 <        txt += '            echo "JOB_EXIT_STATUS = 10032"\n'
1143 <        txt += '            echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1305 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1306 <        txt += '            exit 1\n'
1141 >        txt += '            echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1142 >        txt += '            job_exit_code=10032\n'
1143 >        txt += '            func_exit\n'
1144          txt += '        fi\n'
1145          txt += '    fi\n'
1146          txt += '    \n'
1147 <        txt += '    echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1147 >        txt += '    echo "==> setup cms environment ok"\n'
1148          return txt
1149  
1313    ### FEDE FOR DBS OUTPUT PUBLICATION
1150      def modifyReport(self, nj):
1151          """
1152          insert the part of the script that modifies the FrameworkJob Report
1153          """
1154 <
1155 <        txt = ''
1320 <        try:
1321 <            publish_data = int(self.cfg_params['USER.publish_data'])
1322 <        except KeyError:
1323 <            publish_data = 0
1154 >        txt = '\n#Written by cms_cmssw::modifyReport\n'
1155 >        publish_data = int(self.cfg_params.get('USER.publish_data',0))
1156          if (publish_data == 1):
1157 <            txt += 'echo ">>> Modify Job Report:" \n'
1158 <            ################ FEDE FOR DBS2 #############################################
1327 <            txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1328 <            #############################################################################
1157 >            processedDataset = self.cfg_params['USER.publish_data_name']
1158 >            LFNBaseName = LFNBase(processedDataset)
1159  
1160 <            txt += 'if [ -z "$SE" ]; then\n'
1161 <            txt += '    SE="" \n'
1162 <            txt += 'fi \n'
1163 <            txt += 'if [ -z "$SE_PATH" ]; then\n'
1164 <            txt += '    SE_PATH="" \n'
1165 <            txt += 'fi \n'
1166 <            txt += 'echo "SE = $SE"\n'
1337 <            txt += 'echo "SE_PATH = $SE_PATH"\n'
1160 >            txt += 'if [ $copy_exit_status -eq 0 ]; then\n'
1161 >            txt += '    FOR_LFN=%s_${PSETHASH}/\n'%(LFNBaseName)
1162 >            txt += 'else\n'
1163 >            txt += '    FOR_LFN=/copy_problems/ \n'
1164 >            txt += '    SE=""\n'
1165 >            txt += '    SE_PATH=""\n'
1166 >            txt += 'fi\n'
1167  
1168 <            processedDataset = self.cfg_params['USER.publish_data_name']
1168 >            txt += 'echo ">>> Modify Job Report:" \n'
1169 >            txt += 'chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1170              txt += 'ProcessedDataset='+processedDataset+'\n'
1341            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1342            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1343            #### FEDE: added slash in LFN ##############
1344            txt += '    FOR_LFN=/copy_problems/ \n'
1345            txt += 'else \n'
1346            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1347            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1348            txt += '    FOR_LFN=/store$tmp \n'
1349            txt += 'fi \n'
1171              txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1172 +            txt += 'echo "SE = $SE"\n'
1173 +            txt += 'echo "SE_PATH = $SE_PATH"\n'
1174              txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1175              txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1176 <            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1177 <            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1355 <            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1356 <            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1357 <
1176 >            txt += 'echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1177 >            txt += '$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1178              txt += 'modifyReport_result=$?\n'
1359            txt += 'echo modifyReport_result = $modifyReport_result\n'
1179              txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1180 <            txt += '    exit_status=1\n'
1181 <            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1180 >            txt += '    modifyReport_result=70500\n'
1181 >            txt += '    job_exit_code=$modifyReport_result\n'
1182 >            txt += '    echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1183 >            txt += '    echo "WARNING: Problem with ModifyJobReport"\n'
1184              txt += 'else\n'
1185 <            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1185 >            txt += '    mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1186              txt += 'fi\n'
1366        else:
1367            txt += 'echo "no data publication required"\n'
1187          return txt
1188  
1189 <    def cleanEnv(self):
1190 <        txt = ''
1191 <        txt += 'if [ $middleware == OSG ]; then\n'
1192 <        txt += '    cd $RUNTIME_AREA\n'
1193 <        txt += '    echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1194 <        txt += '    echo ">>> Remove working directory: $WORKING_DIR"\n'
1195 <        txt += '    /bin/rm -rf $WORKING_DIR\n'
1196 <        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1197 <        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1198 <        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
1199 <        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1200 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1189 >    def wsParseFJR(self):
1190 >        """
1191 >        Parse the FrameworkJobReport to obtain useful infos
1192 >        """
1193 >        txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1194 >        txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1195 >        txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1196 >        txt += '    if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1197 >        txt += '        cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --MonitorID $MonitorID --MonitorJobID $MonitorJobID`\n'
1198 >        txt += '        echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1199 >        txt += '        tmp_executable_exit_status=`echo $cmd_out | awk -F\; \'{print $1}\' | awk -F \' \' \'{print $NF}\'`\n'
1200 >        txt += '        if [ -n $tmp_executable_exit_status ];then\n'
1201 >        txt += '            executable_exit_status=$tmp_executable_exit_status\n'
1202 >        txt += '        fi\n'
1203 >        txt += '        if [ $executable_exit_status -eq 50115 ];then\n'
1204 >        txt += '            echo ">>> crab_fjr.xml contents: "\n'
1205 >        txt += '            cat $RUNTIME_AREA/crab_fjr_NJob.xml\n'
1206 >        txt += '            echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1207 >        txt += '        else\n'
1208 >        txt += '            echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1209 >        txt += '        fi\n'
1210 >        txt += '    else\n'
1211 >        txt += '        echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1212          txt += '    fi\n'
1213 +          #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1214 +
1215 +        if self.datasetPath:
1216 +          # VERIFY PROCESSED DATA
1217 +            txt += '    if [ $executable_exit_status -eq 0 ];then\n'
1218 +            txt += '      echo ">>> Verify list of processed files:"\n'
1219 +            txt += '      echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1220 +            txt += '      grep LFN $RUNTIME_AREA/crab_fjr_$NJob.xml |cut -d">" -f2|cut -d"<" -f1|grep "/" > processed-files.txt\n'
1221 +            txt += '      cat input-files.txt  | sort | uniq > tmp.txt\n'
1222 +            txt += '      mv tmp.txt input-files.txt\n'
1223 +            txt += '      echo "cat input-files.txt"\n'
1224 +            txt += '      echo "----------------------"\n'
1225 +            txt += '      cat input-files.txt\n'
1226 +            txt += '      cat processed-files.txt | sort | uniq > tmp.txt\n'
1227 +            txt += '      mv tmp.txt processed-files.txt\n'
1228 +            txt += '      echo "----------------------"\n'
1229 +            txt += '      echo "cat processed-files.txt"\n'
1230 +            txt += '      echo "----------------------"\n'
1231 +            txt += '      cat processed-files.txt\n'
1232 +            txt += '      echo "----------------------"\n'
1233 +            txt += '      diff -q input-files.txt processed-files.txt\n'
1234 +            txt += '      fileverify_status=$?\n'
1235 +            txt += '      if [ $fileverify_status -ne 0 ]; then\n'
1236 +            txt += '         executable_exit_status=30001\n'
1237 +            txt += '         echo "ERROR ==> not all input files processed"\n'
1238 +            txt += '         echo "      ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1239 +            txt += '         echo "      ==> diff input-files.txt processed-files.txt"\n'
1240 +            txt += '      fi\n'
1241 +            txt += '    fi\n'
1242 +            txt += '\n'
1243 +        txt += 'else\n'
1244 +        txt += '    echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1245          txt += 'fi\n'
1246          txt += '\n'
1247 +        txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1248 +        txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1249 +        txt += 'job_exit_code=$executable_exit_status\n'
1250 +
1251          return txt
1252  
1253      def setParam_(self, param, value):
# Line 1390 | Line 1256 | class Cmssw(JobType):
1256      def getParams(self):
1257          return self._params
1258  
1393    def setTaskid_(self):
1394        self._taskId = self.cfg_params['taskId']
1395
1396    def getTaskid(self):
1397        return self._taskId
1398
1259      def uniquelist(self, old):
1260          """
1261          remove duplicates from a list
# Line 1405 | Line 1265 | class Cmssw(JobType):
1265              nd[e]=0
1266          return nd.keys()
1267  
1268 <
1409 <    def checkOut(self, limit):
1268 >    def outList(self):
1269          """
1270          check the dimension of the output files
1271          """
1272 <        txt += 'echo ">>> Starting output sandbox limit check :"\n'
1273 <        allOutFiles = ""
1272 >        txt = ''
1273 >        txt += 'echo ">>> list of expected files on output sandbox"\n'
1274          listOutFiles = []
1275 <        for fileOut in (self.output_file+self.output_file_sandbox):
1276 <             if fileOut.find('crab_fjr') == -1:
1277 <                 allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1278 <                 listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1279 <        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1280 <        txt += 'ls -gGhrta;\n'
1281 <        txt += 'sum=0;\n'
1282 <        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1283 <        txt += '    if [ -e $file ]; then\n'
1284 <        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1285 <        txt += '        sum=`expr $sum + $tt`\n'
1286 <        txt += '    else\n'
1287 <        txt += '        echo "WARNING: output file $file not found!"\n'
1288 <        txt += '    fi\n'
1289 <        txt += 'done\n'
1431 <        txt += 'echo "Total Output dimension: $sum";\n'
1432 <        txt += 'limit='+str(limit)+';\n'
1433 <        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1434 <        txt += 'if [ $limit -lt $sum ]; then\n'
1435 <        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1436 <        txt += '    echo "         checking the output file sizes..."\n'
1437 <        """
1438 <        txt += '    dim=0;\n'
1439 <        txt += '    exclude=0;\n'
1440 <        txt += '    for files in '+str(allOutFiles)+' ; do\n'
1441 <        txt += '        sumTemp=0;\n'
1442 <        txt += '        for file2 in '+str(allOutFiles)+' ; do\n'
1443 <        txt += '            if [ $file != $file2 ]; then\n'
1444 <        txt += '                tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1445 <        txt += '                sumTemp=`expr $sumTemp + $tt`;\n'
1446 <        txt += '            fi\n'
1447 <        txt += '        done\n'
1448 <        txt += '        if [ $sumTemp -lt $limit ]; then\n'
1449 <        txt += '            if [ $dim -lt $sumTemp ]; then\n'
1450 <        txt += '                dim=$sumTemp;\n'
1451 <        txt += '                exclude=$file;\n'
1452 <        txt += '            fi\n'
1453 <        txt += '        fi\n'
1454 <        txt += '    done\n'
1455 <        txt += '    echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1456 <        """
1457 <        txt += '    tot=0;\n'
1458 <        txt += '    for file2 in '+str(allOutFiles)+' ; do\n'
1459 <        txt += '        tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1460 <        txt += '        tot=`expr $tot + $tt`;\n'
1461 <        txt += '        if [ $limit -lt $tot ]; then\n'
1462 <        txt += '            tot=`expr $tot - $tt`;\n'
1463 <        txt += '            fileLast=$file;\n'
1464 <        txt += '            break;\n'
1465 <        txt += '        fi\n'
1466 <        txt += '    done\n'
1467 <        txt += '    echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1468 <        txt += '    flag=0;\n'
1469 <        txt += '    for filess in '+str(allOutFiles)+' ; do\n'
1470 <        txt += '        if [ $fileLast = $filess ]; then\n'
1471 <        txt += '            flag=1;\n'
1472 <        txt += '        fi\n'
1473 <        txt += '        if [ $flag -eq 1 ]; then\n'
1474 <        txt += '            rm -f $filess;\n'
1475 <        txt += '        fi\n'
1476 <        txt += '    done\n'
1477 <        txt += '    ls -agGhrt;\n'
1478 <        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1479 <        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1480 <        txt += '    exit_status=70000;\n'
1481 <        txt += 'else'
1482 <        txt += '    echo "Total Output dimension $sum is fine.";\n'
1483 <        txt += 'fi\n'
1484 <        txt += 'echo "Ending output sandbox limit check"\n'
1275 >        stdout = 'CMSSW_$NJob.stdout'
1276 >        stderr = 'CMSSW_$NJob.stderr'
1277 >        if (self.return_data == 1):
1278 >            for file in (self.output_file+self.output_file_sandbox):
1279 >                listOutFiles.append(self.numberFile_(file, '$NJob'))
1280 >            listOutFiles.append(stdout)
1281 >            listOutFiles.append(stderr)
1282 >        else:
1283 >            for file in (self.output_file_sandbox):
1284 >                listOutFiles.append(self.numberFile_(file, '$NJob'))
1285 >            listOutFiles.append(stdout)
1286 >            listOutFiles.append(stderr)
1287 >        txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1288 >        txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1289 >        txt += 'export filesToCheck\n'
1290          return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines