ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.130 by fanzago, Wed Oct 17 13:24:41 2007 UTC vs.
Revision 1.205 by slacapra, Thu Jun 5 16:34:04 2008 UTC

# Line 5 | Line 5 | from crab_util import *
5   from BlackWhiteListParser import BlackWhiteListParser
6   import common
7   import Scram
8 + from LFNBaseName import *
9  
10   import os, string, glob
11  
# Line 13 | Line 14 | class Cmssw(JobType):
14          JobType.__init__(self, 'CMSSW')
15          common.logger.debug(3,'CMSSW::__init__')
16  
17 +        self.argsList = []
18 +
19          self._params = {}
20          self.cfg_params = cfg_params
18
21          # init BlackWhiteListParser
22          self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
23  
24 <        try:
23 <            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24 <        except KeyError:
25 <            self.MaxTarBallSize = 9.5
24 >        self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
25  
26          # number of jobs requested to be created, limit obj splitting
27          self.ncjobs = ncjobs
28  
29          log = common.logger
30 <        
30 >
31          self.scram = Scram.Scram(cfg_params)
32          self.additional_inbox_files = []
33          self.scriptExe = ''
34          self.executable = ''
35          self.executable_arch = self.scram.getArch()
36          self.tgz_name = 'default.tgz'
38        self.additional_tgz_name = 'additional.tgz'
37          self.scriptName = 'CMSSW.sh'
38 <        self.pset = ''      #scrip use case Da  
39 <        self.datasetPath = '' #scrip use case Da
38 >        self.pset = ''
39 >        self.datasetPath = ''
40  
41          # set FJR file name
42          self.fjrFileName = 'crab_fjr.xml'
43  
44          self.version = self.scram.getSWVersion()
45 <        
46 <        #
47 <        # Try to block creation in case of arch/version mismatch
48 <        #
49 <
50 <        a = string.split(self.version, "_")
51 <
52 <        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
53 <            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
54 <            raise CrabException(msg)
57 <        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58 <            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
45 >        version_array = self.version.split('_')
46 >        self.CMSSW_major = 0
47 >        self.CMSSW_minor = 0
48 >        self.CMSSW_patch = 0
49 >        try:
50 >            self.CMSSW_major = int(version_array[1])
51 >            self.CMSSW_minor = int(version_array[2])
52 >            self.CMSSW_patch = int(version_array[3])
53 >        except:
54 >            msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
55              raise CrabException(msg)
60        
61        common.taskDB.setDict('codeVersion',self.version)
62        self.setParam_('application', self.version)
56  
57          ### collect Data cards
58  
59 <        ## get DBS mode
60 <        try:
68 <            self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 <        except KeyError:
70 <            self.use_dbs_1 = 0
71 <            
72 <        try:
73 <            tmp =  cfg_params['CMSSW.datasetpath']
74 <            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
75 <            if string.lower(tmp)=='none':
76 <                self.datasetPath = None
77 <                self.selectNoInput = 1
78 <            else:
79 <                self.datasetPath = tmp
80 <                self.selectNoInput = 0
81 <        except KeyError:
82 <            msg = "Error: datasetpath not defined "  
59 >        if not cfg_params.has_key('CMSSW.datasetpath'):
60 >            msg = "Error: datasetpath not defined "
61              raise CrabException(msg)
62 <
63 <        # ML monitoring
64 <        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
65 <        if not self.datasetPath:
66 <            self.setParam_('dataset', 'None')
89 <            self.setParam_('owner', 'None')
62 >        tmp =  cfg_params['CMSSW.datasetpath']
63 >        log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
64 >        if string.lower(tmp)=='none':
65 >            self.datasetPath = None
66 >            self.selectNoInput = 1
67          else:
68 <            try:
69 <                datasetpath_split = self.datasetPath.split("/")
93 <                # standard style
94 <                self.setParam_('datasetFull', self.datasetPath)
95 <                if self.use_dbs_1 == 1 :
96 <                    self.setParam_('dataset', datasetpath_split[1])
97 <                    self.setParam_('owner', datasetpath_split[-1])
98 <                else:
99 <                    self.setParam_('dataset', datasetpath_split[1])
100 <                    self.setParam_('owner', datasetpath_split[2])
101 <            except:
102 <                self.setParam_('dataset', self.datasetPath)
103 <                self.setParam_('owner', self.datasetPath)
104 <                
105 <        self.setTaskid_()
106 <        self.setParam_('taskId', self.cfg_params['taskId'])
68 >            self.datasetPath = tmp
69 >            self.selectNoInput = 0
70  
71          self.dataTiers = []
72 <
72 >        self.debugWrap = ''
73 >        self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False)
74 >        if self.debug_wrapper: self.debugWrap='--debug'
75          ## now the application
76 <        try:
77 <            self.executable = cfg_params['CMSSW.executable']
113 <            self.setParam_('exe', self.executable)
114 <            log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
115 <            msg = "Default executable cmsRun overridden. Switch to " + self.executable
116 <            log.debug(3,msg)
117 <        except KeyError:
118 <            self.executable = 'cmsRun'
119 <            self.setParam_('exe', self.executable)
120 <            msg = "User executable not defined. Use cmsRun"
121 <            log.debug(3,msg)
122 <            pass
76 >        self.executable = cfg_params.get('CMSSW.executable','cmsRun')
77 >        log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
78  
79 <        try:
125 <            self.pset = cfg_params['CMSSW.pset']
126 <            log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 <            if self.pset.lower() != 'none' :
128 <                if (not os.path.exists(self.pset)):
129 <                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
130 <            else:
131 <                self.pset = None
132 <        except KeyError:
79 >        if not cfg_params.has_key('CMSSW.pset'):
80              raise CrabException("PSet file missing. Cannot run cmsRun ")
81 +        self.pset = cfg_params['CMSSW.pset']
82 +        log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
83 +        if self.pset.lower() != 'none' :
84 +            if (not os.path.exists(self.pset)):
85 +                raise CrabException("User defined PSet file "+self.pset+" does not exist")
86 +        else:
87 +            self.pset = None
88  
89          # output files
90          ## stuff which must be returned always via sandbox
# Line 140 | Line 94 | class Cmssw(JobType):
94          self.output_file_sandbox.append(self.fjrFileName)
95  
96          # other output files to be returned via sandbox or copied to SE
97 <        try:
98 <            self.output_file = []
99 <            tmp = cfg_params['CMSSW.output_file']
100 <            if tmp != '':
101 <                tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
102 <                log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
103 <                for tmp in tmpOutFiles:
104 <                    tmp=string.strip(tmp)
151 <                    self.output_file.append(tmp)
152 <                    pass
153 <            else:
154 <                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
97 >        self.output_file = []
98 >        tmp = cfg_params.get('CMSSW.output_file',None)
99 >        if tmp :
100 >            tmpOutFiles = string.split(tmp,',')
101 >            log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
102 >            for tmp in tmpOutFiles:
103 >                tmp=string.strip(tmp)
104 >                self.output_file.append(tmp)
105                  pass
106 <            pass
157 <        except KeyError:
106 >        else:
107              log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
108 <            pass
108 >        pass
109  
110          # script_exe file as additional file in inputSandbox
111 <        try:
112 <            self.scriptExe = cfg_params['USER.script_exe']
113 <            if self.scriptExe != '':
114 <               if not os.path.isfile(self.scriptExe):
115 <                  msg ="ERROR. file "+self.scriptExe+" not found"
116 <                  raise CrabException(msg)
168 <               self.additional_inbox_files.append(string.strip(self.scriptExe))
169 <        except KeyError:
170 <            self.scriptExe = ''
111 >        self.scriptExe = cfg_params.get('USER.script_exe',None)
112 >        if self.scriptExe :
113 >            if not os.path.isfile(self.scriptExe):
114 >                msg ="ERROR. file "+self.scriptExe+" not found"
115 >                raise CrabException(msg)
116 >            self.additional_inbox_files.append(string.strip(self.scriptExe))
117  
172        #CarlosDaniele
118          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
119 <           msg ="Error. script_exe  not defined"
120 <           raise CrabException(msg)
119 >            msg ="Error. script_exe  not defined"
120 >            raise CrabException(msg)
121 >
122 >        # use parent files...
123 >        self.useParent = self.cfg_params.get('CMSSW.use_parent',False)
124  
125          ## additional input files
126 <        try:
126 >        if cfg_params.has_key('USER.additional_input_files'):
127              tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
128              for tmp in tmpAddFiles:
129                  tmp = string.strip(tmp)
# Line 192 | Line 140 | class Cmssw(JobType):
140                      if not os.path.exists(file):
141                          raise CrabException("Additional input file not found: "+file)
142                      pass
195                    # fname = string.split(file, '/')[-1]
196                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
197                    # shutil.copyfile(file, storedFile)
143                      self.additional_inbox_files.append(string.strip(file))
144                  pass
145              pass
146              common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
147 <        except KeyError:
203 <            pass
204 <
205 <        # files per job
206 <        try:
207 <            if (cfg_params['CMSSW.files_per_jobs']):
208 <                raise CrabException("files_per_jobs no longer supported.  Quitting.")
209 <        except KeyError:
210 <            pass
147 >        pass
148  
149          ## Events per job
150 <        try:
150 >        if cfg_params.has_key('CMSSW.events_per_job'):
151              self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
152              self.selectEventsPerJob = 1
153 <        except KeyError:
153 >        else:
154              self.eventsPerJob = -1
155              self.selectEventsPerJob = 0
156 <    
156 >
157          ## number of jobs
158 <        try:
158 >        if cfg_params.has_key('CMSSW.number_of_jobs'):
159              self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
160              self.selectNumberOfJobs = 1
161 <        except KeyError:
161 >        else:
162              self.theNumberOfJobs = 0
163              self.selectNumberOfJobs = 0
164  
165 <        try:
165 >        if cfg_params.has_key('CMSSW.total_number_of_events'):
166              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
167              self.selectTotalNumberEvents = 1
168 <        except KeyError:
168 >            if self.selectNumberOfJobs  == 1:
169 >                if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
170 >                    msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
171 >                    raise CrabException(msg)
172 >        else:
173              self.total_number_of_events = 0
174              self.selectTotalNumberEvents = 0
175  
176 <        if self.pset != None: #CarlosDaniele
176 >        if self.pset != None:
177               if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
178                   msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
179                   raise CrabException(msg)
# Line 241 | Line 182 | class Cmssw(JobType):
182                   msg = 'Must specify  number_of_jobs.'
183                   raise CrabException(msg)
184  
185 <        ## source seed for pythia
186 <        try:
187 <            self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
188 <        except KeyError:
189 <            self.sourceSeed = None
190 <            common.logger.debug(5,"No seed given")
185 >        ## New method of dealing with seeds
186 >        self.incrementSeeds = []
187 >        self.preserveSeeds = []
188 >        if cfg_params.has_key('CMSSW.preserve_seeds'):
189 >            tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
190 >            for tmp in tmpList:
191 >                tmp.strip()
192 >                self.preserveSeeds.append(tmp)
193 >        if cfg_params.has_key('CMSSW.increment_seeds'):
194 >            tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
195 >            for tmp in tmpList:
196 >                tmp.strip()
197 >                self.incrementSeeds.append(tmp)
198 >
199 >        ## Old method of dealing with seeds
200 >        ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
201 >        ## remove
202 >        self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
203 >        if self.sourceSeed:
204 >            print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
205 >            self.incrementSeeds.append('sourceSeed')
206 >            self.incrementSeeds.append('theSource')
207 >
208 >        self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
209 >        if self.sourceSeedVtx:
210 >            print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
211 >            self.incrementSeeds.append('VtxSmeared')
212 >
213 >        self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
214 >        if self.sourceSeedG4:
215 >            print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
216 >            self.incrementSeeds.append('g4SimHits')
217 >
218 >        self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
219 >        if self.sourceSeedMix:
220 >            print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
221 >            self.incrementSeeds.append('mix')
222  
223 <        try:
252 <            self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
253 <        except KeyError:
254 <            self.sourceSeedVtx = None
255 <            common.logger.debug(5,"No vertex seed given")
223 >        self.firstRun = cfg_params.get('CMSSW.first_run',None)
224  
257        try:
258            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259        except KeyError:
260            self.sourceSeedG4 = None
261            common.logger.debug(5,"No g4 sim hits seed given")
262
263        try:
264            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265        except KeyError:
266            self.sourceSeedMix = None
267            common.logger.debug(5,"No mix seed given")
268
269        try:
270            self.firstRun = int(cfg_params['CMSSW.first_run'])
271        except KeyError:
272            self.firstRun = None
273            common.logger.debug(5,"No first run given")
225          if self.pset != None: #CarlosDaniele
226 <            ver = string.split(self.version,"_")
276 <            if (int(ver[1])>=1 and int(ver[2])>=5):
277 <                import PsetManipulator150 as pp
278 <            else:
279 <                import PsetManipulator as pp
226 >            import PsetManipulator as pp
227              PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
228  
229 +        # Copy/return
230 +
231 +        self.copy_data = int(cfg_params.get('USER.copy_data',0))
232 +        self.return_data = int(cfg_params.get('USER.return_data',0))
233 +
234          #DBSDLS-start
235 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
235 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
236          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
237          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
238          self.jobDestination=[]  # Site destination(s) for each job (list of lists)
# Line 289 | Line 241 | class Cmssw(JobType):
241          blockSites = {}
242          if self.datasetPath:
243              blockSites = self.DataDiscoveryAndLocation(cfg_params)
244 <        #DBSDLS-end          
244 >        #DBSDLS-end
245 >
246  
294        self.tgzNameWithPath = self.getTarBall(self.executable)
295    
247          ## Select Splitting
248 <        if self.selectNoInput:
249 <            if self.pset == None: #CarlosDaniele
248 >        if self.selectNoInput:
249 >            if self.pset == None:
250                  self.jobSplittingForScript()
251              else:
252                  self.jobSplittingNoInput()
# Line 303 | Line 254 | class Cmssw(JobType):
254              self.jobSplittingByBlocks(blockSites)
255  
256          # modify Pset
257 <        if self.pset != None: #CarlosDaniele
257 >        if self.pset != None:
258              try:
259 <                if (self.datasetPath): # standard job
260 <                    # allow to processa a fraction of events in a file
261 <                    PsetEdit.inputModule("INPUT")
262 <                    PsetEdit.maxEvent("INPUTMAXEVENTS")
312 <                    PsetEdit.skipEvent("INPUTSKIPEVENTS")
313 <                else:  # pythia like job
314 <                    PsetEdit.maxEvent(self.eventsPerJob)
315 <                    if (self.firstRun):
316 <                        PsetEdit.pythiaFirstRun("INPUTFIRSTRUN")  #First Run
317 <                    if (self.sourceSeed) :
318 <                        PsetEdit.pythiaSeed("INPUT")
319 <                        if (self.sourceSeedVtx) :
320 <                            PsetEdit.vtxSeed("INPUTVTX")
321 <                        if (self.sourceSeedG4) :
322 <                            PsetEdit.g4Seed("INPUTG4")
323 <                        if (self.sourceSeedMix) :
324 <                            PsetEdit.mixSeed("INPUTMIX")
325 <                # add FrameworkJobReport to parameter-set
326 <                PsetEdit.addCrabFJR(self.fjrFileName)
259 >                # Add FrameworkJobReport to parameter-set, set max events.
260 >                # Reset later for data jobs by writeCFG which does all modifications
261 >                PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
262 >                PsetEdit.maxEvent(self.eventsPerJob)
263                  PsetEdit.psetWriter(self.configFilename())
264              except:
265 <                msg='Error while manipuliating ParameterSet: exiting...'
265 >                msg='Error while manipulating ParameterSet: exiting...'
266                  raise CrabException(msg)
267 +        self.tgzNameWithPath = self.getTarBall(self.executable)
268  
269      def DataDiscoveryAndLocation(self, cfg_params):
270  
271          import DataDiscovery
335        import DataDiscovery_DBS2
272          import DataLocation
273          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
274  
# Line 341 | Line 277 | class Cmssw(JobType):
277          ## Contact the DBS
278          common.logger.message("Contacting Data Discovery Services ...")
279          try:
280 <
345 <            if self.use_dbs_1 == 1 :
346 <                self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
347 <            else :
348 <                self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
280 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
281              self.pubdata.fetchDBSInfo()
282  
283          except DataDiscovery.NotExistingDatasetError, ex :
# Line 357 | Line 289 | class Cmssw(JobType):
289          except DataDiscovery.DataDiscoveryError, ex:
290              msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
291              raise CrabException(msg)
360        except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
361            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
362            raise CrabException(msg)
363        except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
364            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
365            raise CrabException(msg)
366        except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
367            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
368            raise CrabException(msg)
292  
293          self.filesbyblock=self.pubdata.getFiles()
294          self.eventsbyblock=self.pubdata.getEventsPerBlock()
295          self.eventsbyfile=self.pubdata.getEventsPerFile()
296 +        self.parentFiles=self.pubdata.getParent()
297  
298          ## get max number of events
299 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
299 >        self.maxEvents=self.pubdata.getMaxEvents()
300  
301          ## Contact the DLS and build a list of sites hosting the fileblocks
302          try:
# Line 381 | Line 305 | class Cmssw(JobType):
305          except DataLocation.DataLocationError , ex:
306              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
307              raise CrabException(msg)
308 <        
308 >
309  
310          sites = dataloc.getSites()
311          allSites = []
# Line 395 | Line 319 | class Cmssw(JobType):
319          common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
320  
321          return sites
322 <    
322 >
323      def jobSplittingByBlocks(self, blockSites):
324          """
325          Perform job splitting. Jobs run over an integer number of files
# Line 445 | Line 369 | class Cmssw(JobType):
369              totalNumberOfJobs = 999999999
370          else :
371              totalNumberOfJobs = self.ncjobs
448            
372  
373          blocks = blockSites.keys()
374          blockCount = 0
# Line 465 | Line 388 | class Cmssw(JobType):
388              blockCount += 1
389              if block not in jobsOfBlock.keys() :
390                  jobsOfBlock[block] = []
391 <            
391 >
392              if self.eventsbyblock.has_key(block) :
393                  numEventsInBlock = self.eventsbyblock[block]
394                  common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
395 <            
395 >
396                  files = self.filesbyblock[block]
397                  numFilesInBlock = len(files)
398                  if (numFilesInBlock <= 0):
# Line 477 | Line 400 | class Cmssw(JobType):
400                  fileCount = 0
401  
402                  # ---- New block => New job ---- #
403 <                parString = "\\{"
403 >                parString = ""
404                  # counter for number of events in files currently worked on
405                  filesEventCount = 0
406                  # flag if next while loop should touch new file
407                  newFile = 1
408                  # job event counter
409                  jobSkipEventCount = 0
410 <            
410 >
411                  # ---- Iterate over the files in the block until we've met the requested ---- #
412                  # ---- total # of events or we've gone over all the files in this block  ---- #
413 +                pString=''
414                  while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
415                      file = files[fileCount]
416 +                    if self.useParent:
417 +                        parent = self.parentFiles[file]
418 +                        for f in parent :
419 +                            pString += '\\\"' + f + '\\\"\,'
420 +                        common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
421 +                        common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
422                      if newFile :
423                          try:
424                              numEventsInFile = self.eventsbyfile[file]
# Line 500 | Line 430 | class Cmssw(JobType):
430                              newFile = 0
431                          except KeyError:
432                              common.logger.message("File "+str(file)+" has unknown number of events: skipping")
503                        
433  
434 +                    eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
435                      # if less events in file remain than eventsPerJobRequested
436 <                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
436 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
437                          # if last file in block
438                          if ( fileCount == numFilesInBlock-1 ) :
439                              # end job using last file, use remaining events in block
440                              # close job and touch new file
441                              fullString = parString[:-2]
442 <                            fullString += '\\}'
443 <                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
442 >                            if self.useParent:
443 >                                fullParentString = pString[:-2]
444 >                                list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
445 >                            else:
446 >                                list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
447                              common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
448                              self.jobDestination.append(blockSites[block])
449                              common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 522 | Line 455 | class Cmssw(JobType):
455                              eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
456                              jobSkipEventCount = 0
457                              # reset file
458 <                            parString = "\\{"
458 >                            pString = ""
459 >                            parString = ""
460                              filesEventCount = 0
461                              newFile = 1
462                              fileCount += 1
# Line 534 | Line 468 | class Cmssw(JobType):
468                      elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
469                          # close job and touch new file
470                          fullString = parString[:-2]
471 <                        fullString += '\\}'
472 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
471 >                        if self.useParent:
472 >                            fullParentString = pString[:-2]
473 >                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
474 >                        else:
475 >                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
476                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
477                          self.jobDestination.append(blockSites[block])
478                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 546 | Line 483 | class Cmssw(JobType):
483                          eventsRemaining = eventsRemaining - eventsPerJobRequested
484                          jobSkipEventCount = 0
485                          # reset file
486 <                        parString = "\\{"
486 >                        pString = ""
487 >                        parString = ""
488                          filesEventCount = 0
489                          newFile = 1
490                          fileCount += 1
491 <                        
491 >
492                      # if more events in file remain than eventsPerJobRequested
493                      else :
494                          # close job but don't touch new file
495                          fullString = parString[:-2]
496 <                        fullString += '\\}'
497 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
496 >                        if self.useParent:
497 >                            fullParentString = pString[:-2]
498 >                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
499 >                        else:
500 >                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
501                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
502                          self.jobDestination.append(blockSites[block])
503                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 570 | Line 511 | class Cmssw(JobType):
511                          jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
512                          # remove all but the last file
513                          filesEventCount = self.eventsbyfile[file]
514 <                        parString = "\\{"
515 <                        parString += '\\\"' + file + '\\\"\,'
514 >                        if self.useParent:
515 >                            for f in parent : pString += '\\\"' + f + '\\\"\,'
516 >                        parString = '\\\"' + file + '\\\"\,'
517                      pass # END if
518                  pass # END while (iterate over files in the block)
519          pass # END while (iterate over blocks in the dataset)
# Line 579 | Line 521 | class Cmssw(JobType):
521          if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
522              common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
523          common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
524 <        
524 >
525          # screen output
526          screenOutput = "List of jobs and available destination sites:\n\n"
527  
# Line 591 | Line 533 | class Cmssw(JobType):
533          for block in blocks:
534              if block in jobsOfBlock.keys() :
535                  blockCounter += 1
536 <                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
536 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
537 >                    ','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
538                  if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
539 <                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
539 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
540                      bloskNoSite.append( blockCounter )
541 <        
541 >
542          common.logger.message(screenOutput)
543          if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
544              msg = 'WARNING: No sites are hosting any part of data for block:\n                '
# Line 611 | Line 554 | class Cmssw(JobType):
554              for range_jobs in noSiteBlock:
555                  msg += str(range_jobs) + virgola
556              msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
557 +            if self.cfg_params.has_key('EDG.se_white_list'):
558 +                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
559 +                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
560 +                msg += 'Please check if the dataset is available at this site!)\n'
561 +            if self.cfg_params.has_key('EDG.ce_white_list'):
562 +                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
563 +                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
564 +                msg += 'Please check if the dataset is available at this site!)\n'
565 +
566              common.logger.message(msg)
567  
568          self.list_of_args = list_of_lists
# Line 622 | Line 574 | class Cmssw(JobType):
574          """
575          common.logger.debug(5,'Splitting per events')
576  
577 <        if (self.selectEventsPerJob):
577 >        if (self.selectEventsPerJob):
578              common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
579          if (self.selectNumberOfJobs):
580              common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
# Line 636 | Line 588 | class Cmssw(JobType):
588          if (self.selectEventsPerJob):
589              if (self.selectTotalNumberEvents):
590                  self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
591 <            elif(self.selectNumberOfJobs) :  
591 >            elif(self.selectNumberOfJobs) :
592                  self.total_number_of_jobs =self.theNumberOfJobs
593 <                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
593 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
594  
595          elif (self.selectNumberOfJobs) :
596              self.total_number_of_jobs = self.theNumberOfJobs
597              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
598 <
598 >
599          common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
600  
601          # is there any remainder?
# Line 659 | Line 611 | class Cmssw(JobType):
611          self.list_of_args = []
612          for i in range(self.total_number_of_jobs):
613              ## Since there is no input, any site is good
614 <           # self.jobDestination.append(["Any"])
663 <            self.jobDestination.append([""]) #must be empty to write correctly the xml
614 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
615              args=[]
616              if (self.firstRun):
617 <                    ## pythia first run
667 <                #self.list_of_args.append([(str(self.firstRun)+str(i))])
617 >                ## pythia first run
618                  args.append(str(self.firstRun)+str(i))
669            else:
670                ## no first run
671                #self.list_of_args.append([str(i)])
672                args.append(str(i))
673            if (self.sourceSeed):
674                args.append(str(self.sourceSeed)+str(i))
675                if (self.sourceSeedVtx):
676                    ## + vtx random seed
677                    args.append(str(self.sourceSeedVtx)+str(i))
678                if (self.sourceSeedG4):
679                    ## + G4 random seed
680                    args.append(str(self.sourceSeedG4)+str(i))
681                if (self.sourceSeedMix):    
682                    ## + Mix random seed
683                    args.append(str(self.sourceSeedMix)+str(i))
684                pass
685            pass
619              self.list_of_args.append(args)
687        pass
688            
689        # print self.list_of_args
620  
621          return
622  
623  
624 <    def jobSplittingForScript(self):#CarlosDaniele
624 >    def jobSplittingForScript(self):
625          """
626          Perform job splitting based on number of job
627          """
# Line 707 | Line 637 | class Cmssw(JobType):
637          # argument is seed number.$i
638          self.list_of_args = []
639          for i in range(self.total_number_of_jobs):
710            ## Since there is no input, any site is good
711           # self.jobDestination.append(["Any"])
640              self.jobDestination.append([""])
713            ## no random seed
641              self.list_of_args.append([str(i)])
642          return
643  
644      def split(self, jobParams):
645 <
719 <        common.jobDB.load()
720 <        #### Fabio
645 >
646          njobs = self.total_number_of_jobs
647          arglist = self.list_of_args
648          # create the empty structure
649          for i in range(njobs):
650              jobParams.append("")
651 <        
651 >
652 >        listID=[]
653 >        listField=[]
654          for job in range(njobs):
655              jobParams[job] = arglist[job]
656 <            # print str(arglist[job])
657 <            # print jobParams[job]
658 <            common.jobDB.setArguments(job, jobParams[job])
659 <            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
660 <            common.jobDB.setDestination(job, self.jobDestination[job])
656 >            listID.append(job+1)
657 >            job_ToSave ={}
658 >            concString = ' '
659 >            argu=''
660 >            if len(jobParams[job]):
661 >                argu +=   concString.join(jobParams[job] )
662 >            job_ToSave['arguments']= str(job+1)+' '+argu
663 >            job_ToSave['dlsDestination']= self.jobDestination[job]
664 >            listField.append(job_ToSave)
665 >            msg="Job "+str(job)+" Arguments:   "+str(job+1)+" "+argu+"\n"  \
666 >            +"                     Destination: "+str(self.jobDestination[job])
667 >            common.logger.debug(5,msg)
668 >        common._db.updateJob_(listID,listField)
669 >        self.argsList = (len(jobParams[0])+1)
670  
735        common.jobDB.save()
671          return
672 <    
738 <    def getJobTypeArguments(self, nj, sched):
739 <        result = ''
740 <        for i in common.jobDB.arguments(nj):
741 <            result=result+str(i)+" "
742 <        return result
743 <  
672 >
673      def numberOfJobs(self):
745        # Fabio
674          return self.total_number_of_jobs
675  
676      def getTarBall(self, exe):
677          """
678          Return the TarBall with lib and exe
679          """
752        
753        # if it exist, just return it
754        #
755        # Marco. Let's start to use relative path for Boss XML files
756        #
680          self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
681          if os.path.exists(self.tgzNameWithPath):
682              return self.tgzNameWithPath
# Line 767 | Line 690 | class Cmssw(JobType):
690  
691          # First of all declare the user Scram area
692          swArea = self.scram.getSWArea_()
770        #print "swArea = ", swArea
771        # swVersion = self.scram.getSWVersion()
772        # print "swVersion = ", swVersion
693          swReleaseTop = self.scram.getReleaseTop_()
694 <        #print "swReleaseTop = ", swReleaseTop
775 <        
694 >
695          ## check if working area is release top
696          if swReleaseTop == '' or swArea == swReleaseTop:
697 +            common.logger.debug(3,"swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
698              return
699  
700          import tarfile
# Line 785 | Line 705 | class Cmssw(JobType):
705                  exeWithPath = self.scram.findFile_(executable)
706                  if ( not exeWithPath ):
707                      raise CrabException('User executable '+executable+' not found')
708 <    
708 >
709                  ## then check if it's private or not
710                  if exeWithPath.find(swReleaseTop) == -1:
711                      # the exe is private, so we must ship
# Line 801 | Line 721 | class Cmssw(JobType):
721                  else:
722                      # the exe is from release, we'll find it on WN
723                      pass
724 <    
724 >
725              ## Now get the libraries: only those in local working area
726              libDir = 'lib'
727              lib = swArea+'/' +libDir
728              common.logger.debug(5,"lib "+lib+" to be tarred")
729              if os.path.exists(lib):
730                  tar.add(lib,libDir)
731 <    
731 >
732              ## Now check if module dir is present
733              moduleDir = 'module'
734              module = swArea + '/' + moduleDir
# Line 817 | Line 737 | class Cmssw(JobType):
737  
738              ## Now check if any data dir(s) is present
739              swAreaLen=len(swArea)
740 +            self.dataExist = False
741              for root, dirs, files in os.walk(swArea):
742                  if "data" in dirs:
743 +                    self.dataExist=True
744                      common.logger.debug(5,"data "+root+"/data"+" to be tarred")
745                      tar.add(root+"/data",root[swAreaLen:]+"/data")
746  
747 <            ## Add ProdAgent dir to tar
748 <            paDir = 'ProdAgentApi'
749 <            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
750 <            if os.path.isdir(pa):
751 <                tar.add(pa,paDir)
747 >            ### CMSSW ParameterSet
748 >            if not self.pset is None:
749 >                cfg_file = common.work_space.jobDir()+self.configFilename()
750 >                tar.add(cfg_file,self.configFilename())
751 >                common.logger.debug(5,"File added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
752 >
753  
754 <            ### FEDE FOR DBS PUBLICATION
832 <            ## Add PRODCOMMON dir to tar
754 >            ## Add ProdCommon dir to tar
755              prodcommonDir = 'ProdCommon'
756 <            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
756 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
757              if os.path.isdir(prodcommonPath):
758                  tar.add(prodcommonPath,prodcommonDir)
837            #############################    
838        
759              common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
760 +
761 +            ##### ML stuff
762 +            ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
763 +            path=os.environ['CRABDIR'] + '/python/'
764 +            for file in ML_file_list:
765 +                tar.add(path+file,file)
766 +            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
767 +
768 +            ##### Utils
769 +            Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py']
770 +            for file in Utils_file_list:
771 +                tar.add(path+file,file)
772 +            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
773 +
774 +            ##### AdditionalFiles
775 +            for file in self.additional_inbox_files:
776 +                tar.add(file,string.split(file,'/')[-1])
777 +            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
778 +
779              tar.close()
780          except :
781              raise CrabException('Could not create tar-ball')
# Line 847 | Line 786 | class Cmssw(JobType):
786              raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
787  
788          ## create tar-ball with ML stuff
850        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
851        try:
852            tar = tarfile.open(self.MLtgzfile, "w:gz")
853            path=os.environ['CRABDIR'] + '/python/'
854            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
855                tar.add(path+file,file)
856            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
857            tar.close()
858        except :
859            raise CrabException('Could not create ML files tar-ball')
860        
861        return
862        
863    def additionalInputFileTgz(self):
864        """
865        Put all additional files into a tar ball and return its name
866        """
867        import tarfile
868        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
869        tar = tarfile.open(tarName, "w:gz")
870        for file in self.additional_inbox_files:
871            tar.add(file,string.split(file,'/')[-1])
872        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
873        tar.close()
874        return tarName
789  
790 <    def wsSetupEnvironment(self, nj):
790 >    def wsSetupEnvironment(self, nj=0):
791          """
792          Returns part of a job script which prepares
793          the execution environment for the job 'nj'.
794          """
795 +        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
796 +            psetName = 'pset.py'
797 +        else:
798 +            psetName = 'pset.cfg'
799          # Prepare JobType-independent part
800 <        txt = ''
801 <  
802 <        ## OLI_Daniele at this level  middleware already known
885 <
886 <        txt += 'if [ $middleware == LCG ]; then \n'
887 <        txt += '    echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
888 <        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
889 <        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
800 >        txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
801 >        txt += 'echo ">>> setup environment"\n'
802 >        txt += 'if [ $middleware == LCG ]; then \n'
803          txt += self.wsSetupCMSLCGEnvironment_()
804          txt += 'elif [ $middleware == OSG ]; then\n'
805          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
806 <        txt += '    echo "Created working directory: $WORKING_DIR"\n'
807 <        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
808 <        txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
809 <        txt += '    echo "JOB_EXIT_STATUS = 10016"\n'
897 <        txt += '    echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
898 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
899 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
900 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
901 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
902 <        txt += '        exit 1\n'
806 >        txt += '    if [ ! $? == 0 ] ;then\n'
807 >        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
808 >        txt += '        job_exit_code=10016\n'
809 >        txt += '        func_exit\n'
810          txt += '    fi\n'
811 +        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
812          txt += '\n'
813          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
814          txt += '    cd $WORKING_DIR\n'
815 <        txt += self.wsSetupCMSOSGEnvironment_()
816 <        txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
909 <        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
815 >        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
816 >        txt += self.wsSetupCMSOSGEnvironment_()
817          txt += 'fi\n'
818  
819          # Prepare JobType-specific part
820          scram = self.scram.commandName()
821          txt += '\n\n'
822 <        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
822 >        txt += 'echo ">>> specific cmssw setup environment:"\n'
823 >        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
824          txt += scram+' project CMSSW '+self.version+'\n'
825          txt += 'status=$?\n'
826          txt += 'if [ $status != 0 ] ; then\n'
827 <        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
828 <        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
829 <        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
922 <        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
923 <        txt += '   rm -f $RUNTIME_AREA/$repo \n'
924 <        txt += '   echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
925 <        txt += '   echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
926 <        ## OLI_Daniele
927 <        txt += '    if [ $middleware == OSG ]; then \n'
928 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
929 <        txt += '        cd $RUNTIME_AREA\n'
930 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
931 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
932 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
933 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
934 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
935 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
936 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
937 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
938 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
939 <        txt += '        fi\n'
940 <        txt += '    fi \n'
941 <        txt += '   exit 1 \n'
827 >        txt += '    echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
828 >        txt += '    job_exit_code=10034\n'
829 >        txt += '    func_exit\n'
830          txt += 'fi \n'
943        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
831          txt += 'cd '+self.version+'\n'
945        ########## FEDE FOR DBS2 ######################
832          txt += 'SOFTWARE_DIR=`pwd`\n'
833 <        txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
948 <        ###############################################
949 <        ### needed grep for bug in scramv1 ###
950 <        txt += scram+' runtime -sh\n'
833 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
834          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
835 <        txt += 'echo $PATH\n'
836 <
835 >        txt += 'if [ $? != 0 ] ; then\n'
836 >        txt += '    echo "ERROR ==> Problem with the command: "\n'
837 >        txt += '    echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
838 >        txt += '    job_exit_code=10034\n'
839 >        txt += '    func_exit\n'
840 >        txt += 'fi \n'
841          # Handle the arguments:
842          txt += "\n"
843          txt += "## number of arguments (first argument always jobnumber)\n"
844          txt += "\n"
845 < #        txt += "narg=$#\n"
959 <        txt += "if [ $nargs -lt 2 ]\n"
845 >        txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
846          txt += "then\n"
847 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
848 <        txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
849 <        txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
964 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
965 <        txt += '    rm -f $RUNTIME_AREA/$repo \n'
966 <        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
967 <        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
968 <        ## OLI_Daniele
969 <        txt += '    if [ $middleware == OSG ]; then \n'
970 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
971 <        txt += '        cd $RUNTIME_AREA\n'
972 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
973 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
974 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
975 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
976 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
977 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
978 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
979 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
980 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
981 <        txt += '        fi\n'
982 <        txt += '    fi \n'
983 <        txt += "    exit 1\n"
847 >        txt += "    echo 'ERROR ==> Too few arguments' +$nargs+ \n"
848 >        txt += '    job_exit_code=50113\n'
849 >        txt += "    func_exit\n"
850          txt += "fi\n"
851          txt += "\n"
852  
853          # Prepare job-specific part
854          job = common.job_list[nj]
855 <        ### FEDE FOR DBS OUTPUT PUBLICATION
990 <        if (self.datasetPath):
855 >        if (self.datasetPath):
856              txt += '\n'
857              txt += 'DatasetPath='+self.datasetPath+'\n'
858  
859              datasetpath_split = self.datasetPath.split("/")
860 <            
860 >
861              txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
862              txt += 'DataTier='+datasetpath_split[2]+'\n'
998            #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
863              txt += 'ApplicationFamily=cmsRun\n'
864  
865          else:
866              txt += 'DatasetPath=MCDataTier\n'
867              txt += 'PrimaryDataset=null\n'
868              txt += 'DataTier=null\n'
1005            #txt += 'ProcessedDataset=null\n'
869              txt += 'ApplicationFamily=MCDataTier\n'
870 <        if self.pset != None: #CarlosDaniele
870 >        if self.pset != None:
871              pset = os.path.basename(job.configFilename())
872              txt += '\n'
873              txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
874              if (self.datasetPath): # standard job
875 <                #txt += 'InputFiles=$2\n'
876 <                txt += 'InputFiles=${args[1]}\n'
877 <                txt += 'MaxEvents=${args[2]}\n'
878 <                txt += 'SkipEvents=${args[3]}\n'
875 >                txt += 'InputFiles=${args[1]}; export InputFiles\n'
876 >                if (self.useParent):  
877 >                    txt += 'ParentFiles=${args[2]}; export ParentFiles\n'
878 >                    txt += 'MaxEvents=${args[3]}; export MaxEvents\n'
879 >                    txt += 'SkipEvents=${args[4]}; export SkipEvents\n'
880 >                else:
881 >                    txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
882 >                    txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
883                  txt += 'echo "Inputfiles:<$InputFiles>"\n'
884 <                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
884 >                if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n'
885                  txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1019                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
886                  txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1021                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
887              else:  # pythia like job
888 <                seedIndex=1
888 >                txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
889 >                txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
890 >                txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
891 >                txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
892                  if (self.firstRun):
893 <                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
893 >                    txt += 'FirstRun=${args[1]}; export FirstRun\n'
894                      txt += 'echo "FirstRun: <$FirstRun>"\n'
1027                    txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1028                    seedIndex=seedIndex+1
895  
896 <                if (self.sourceSeed):
1031 <                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
1032 <                    txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1033 <                    seedIndex=seedIndex+1
1034 <                    ## the following seeds are not always present
1035 <                    if (self.sourceSeedVtx):
1036 <                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1037 <                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1038 <                        txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1039 <                        seedIndex += 1
1040 <                    if (self.sourceSeedG4):
1041 <                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1042 <                        txt += 'echo "G4Seed: <$G4Seed>"\n'
1043 <                        txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1044 <                        seedIndex += 1
1045 <                    if (self.sourceSeedMix):
1046 <                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1047 <                        txt += 'echo "MixSeed: <$mixSeed>"\n'
1048 <                        txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1049 <                        seedIndex += 1
1050 <                    pass
1051 <                pass
1052 <            txt += 'mv -f '+pset+' pset.cfg\n'
896 >            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
897  
1054        if len(self.additional_inbox_files) > 0:
1055            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1056            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1057            txt += 'fi\n'
1058            pass
898  
899 <        if self.pset != None: #CarlosDaniele
900 <            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1062 <        
1063 <            txt += '\n'
1064 <            txt += 'echo "***** cat pset.cfg *********"\n'
1065 <            txt += 'cat pset.cfg\n'
1066 <            txt += 'echo "****** end pset.cfg ********"\n'
899 >        if self.pset != None:
900 >            # FUTURE: Can simply for 2_1_x and higher
901              txt += '\n'
902 <            ### FEDE FOR DBS OUTPUT PUBLICATION
903 <            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
902 >            if self.debug_wrapper==True:
903 >                txt += 'echo "***** cat ' + psetName + ' *********"\n'
904 >                txt += 'cat ' + psetName + '\n'
905 >                txt += 'echo "****** end ' + psetName + ' ********"\n'
906 >                txt += '\n'
907 >            txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
908              txt += 'echo "PSETHASH = $PSETHASH" \n'
1071            ##############
909              txt += '\n'
1073            # txt += 'echo "***** cat pset1.cfg *********"\n'
1074            # txt += 'cat pset1.cfg\n'
1075            # txt += 'echo "****** end pset1.cfg ********"\n'
910          return txt
911  
912 <    def wsBuildExe(self, nj=0):
912 >    def wsUntarSoftware(self, nj=0):
913          """
914          Put in the script the commands to build an executable
915          or a library.
916          """
917  
918 <        txt = ""
918 >        txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
919  
920          if os.path.isfile(self.tgzNameWithPath):
921 <            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
921 >            txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
922              txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
923 +            if  self.debug_wrapper:
924 +                txt += 'ls -Al \n'
925              txt += 'untar_status=$? \n'
926              txt += 'if [ $untar_status -ne 0 ]; then \n'
927 <            txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
928 <            txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
929 <            txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1094 <            txt += '   if [ $middleware == OSG ]; then \n'
1095 <            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1096 <            txt += '       cd $RUNTIME_AREA\n'
1097 <            txt += '       /bin/rm -rf $WORKING_DIR\n'
1098 <            txt += '       if [ -d $WORKING_DIR ] ;then\n'
1099 <            txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1100 <            txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1101 <            txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1102 <            txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1103 <            txt += '           rm -f $RUNTIME_AREA/$repo \n'
1104 <            txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1105 <            txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1106 <            txt += '       fi\n'
1107 <            txt += '   fi \n'
1108 <            txt += '   \n'
1109 <            txt += '   exit 1 \n'
927 >            txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
928 >            txt += '   job_exit_code=$untar_status\n'
929 >            txt += '   func_exit\n'
930              txt += 'else \n'
931              txt += '   echo "Successful untar" \n'
932              txt += 'fi \n'
933              txt += '\n'
934 <            txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
934 >            txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
935              txt += 'if [ -z "$PYTHONPATH" ]; then\n'
936 <            #### FEDE FOR DBS OUTPUT PUBLICATION
1117 <            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1118 <            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1119 <            #txt += '   export PYTHONPATH=ProdAgentApi\n'
936 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/ProdCommon\n'
937              txt += 'else\n'
938 <            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1122 <            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1123 <            #txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
938 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/ProdCommon:${PYTHONPATH}\n'
939              txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1125            ###################  
940              txt += 'fi\n'
941              txt += '\n'
942  
943              pass
944 <        
944 >
945 >        return txt
946 >
947 >    def wsBuildExe(self, nj=0):
948 >        """
949 >        Put in the script the commands to build an executable
950 >        or a library.
951 >        """
952 >
953 >        txt = '\n#Written by cms_cmssw::wsBuildExe\n'
954 >        txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
955 >
956 >        txt += 'rm -r lib/ module/ \n'
957 >        txt += 'mv $RUNTIME_AREA/lib/ . \n'
958 >        txt += 'mv $RUNTIME_AREA/module/ . \n'
959 >        if self.dataExist == True:
960 >            txt += 'rm -r src/ \n'
961 >            txt += 'mv $RUNTIME_AREA/src/ . \n'
962 >        if len(self.additional_inbox_files)>0:
963 >            for file in self.additional_inbox_files:
964 >                txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
965 >        txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
966 >
967 >        txt += 'if [ -z "$PYTHONPATH" ]; then\n'
968 >        txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
969 >        txt += 'else\n'
970 >        txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
971 >        txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
972 >        txt += 'fi\n'
973 >        txt += '\n'
974 >
975          return txt
976  
977      def modifySteeringCards(self, nj):
978          """
979 <        modify the card provided by the user,
979 >        modify the card provided by the user,
980          writing a new card into share dir
981          """
982 <        
982 >
983      def executableName(self):
984 <        if self.scriptExe: #CarlosDaniele
984 >        if self.scriptExe:
985              return "sh "
986          else:
987              return self.executable
988  
989      def executableArgs(self):
990 +        # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
991          if self.scriptExe:#CarlosDaniele
992              return   self.scriptExe + " $NJob"
993          else:
994 <            # if >= CMSSW_1_5_X, add -e
995 <            version_array = self.scram.getSWVersion().split('_')
996 <            major = 0
997 <            minor = 0
998 <            try:
999 <                major = int(version_array[1])
1000 <                minor = int(version_array[2])
1001 <            except:
1157 <                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"  
1158 <                raise CrabException(msg)
1159 <            if major >= 1 and minor >= 5 :
1160 <                return " -e -p pset.cfg"
994 >            ex_args = ""
995 >            # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
996 >            # Framework job report
997 >            if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
998 >                ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
999 >            # Type of config file
1000 >            if self.CMSSW_major >= 2 :
1001 >                ex_args += " -p pset.py"
1002              else:
1003 <                return " -p pset.cfg"
1003 >                ex_args += " -p pset.cfg"
1004 >            return ex_args
1005  
1006      def inputSandbox(self, nj):
1007          """
1008          Returns a list of filenames to be put in JDL input sandbox.
1009          """
1010          inp_box = []
1169        # # dict added to delete duplicate from input sandbox file list
1170        # seen = {}
1171        ## code
1011          if os.path.isfile(self.tgzNameWithPath):
1012              inp_box.append(self.tgzNameWithPath)
1013 <        if os.path.isfile(self.MLtgzfile):
1014 <            inp_box.append(self.MLtgzfile)
1176 <        ## config
1177 <        if not self.pset is None:
1178 <            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1179 <        ## additional input files
1180 <        tgz = self.additionalInputFileTgz()
1181 <        inp_box.append(tgz)
1013 >        wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
1014 >        inp_box.append(common.work_space.pathForTgz() +'job/'+ wrapper)
1015          return inp_box
1016  
1017      def outputSandbox(self, nj):
# Line 1189 | Line 1022 | class Cmssw(JobType):
1022  
1023          ## User Declared output files
1024          for out in (self.output_file+self.output_file_sandbox):
1025 <            n_out = nj + 1
1025 >            n_out = nj + 1
1026              out_box.append(self.numberFile_(out,str(n_out)))
1027          return out_box
1028  
# Line 1204 | Line 1037 | class Cmssw(JobType):
1037          Returns part of a job script which renames the produced files.
1038          """
1039  
1040 <        txt = '\n'
1041 <        txt += '# directory content\n'
1042 <        txt += 'ls \n'
1043 <
1044 <        txt += 'output_exit_status=0\n'
1045 <        
1046 <        for fileWithSuffix in (self.output_file_sandbox):
1214 <            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1215 <            txt += '\n'
1216 <            txt += '# check output file\n'
1217 <            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1218 <            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1219 <            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1220 <            txt += 'else\n'
1221 <            txt += '    exit_status=60302\n'
1222 <            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1223 <            if common.scheduler.boss_scheduler_name == 'condor_g':
1224 <                txt += '    if [ $middleware == OSG ]; then \n'
1225 <                txt += '        echo "prepare dummy output file"\n'
1226 <                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1227 <                txt += '    fi \n'
1228 <            txt += 'fi\n'
1229 <        
1040 >        txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1041 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1042 >        txt += 'echo ">>> current directory content:"\n'
1043 >        if self.debug_wrapper:
1044 >            txt += 'ls -Al\n'
1045 >        txt += '\n'
1046 >
1047          for fileWithSuffix in (self.output_file):
1048              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1049              txt += '\n'
1050              txt += '# check output file\n'
1051              txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1052 <            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1053 <            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1052 >            if (self.copy_data == 1):  # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1053 >                txt += '    mv '+fileWithSuffix+' '+output_file_num+'\n'
1054 >                #txt += '    ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1055 >            else:
1056 >                txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1057 >                txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1058              txt += 'else\n'
1059 <            txt += '    exit_status=60302\n'
1060 <            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1061 <            txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1241 <            txt += '    output_exit_status=$exit_status\n'
1242 <            if common.scheduler.boss_scheduler_name == 'condor_g':
1059 >            txt += '    job_exit_code=60302\n'
1060 >            txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1061 >            if common.scheduler.name().upper() == 'CONDOR_G':
1062                  txt += '    if [ $middleware == OSG ]; then \n'
1063                  txt += '        echo "prepare dummy output file"\n'
1064                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
# Line 1248 | Line 1067 | class Cmssw(JobType):
1067          file_list = []
1068          for fileWithSuffix in (self.output_file):
1069               file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1070 <            
1070 >
1071          txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1072 +        txt += '\n'
1073 +        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1074 +        txt += 'echo ">>> current directory content:"\n'
1075 +        if self.debug_wrapper:
1076 +            txt += 'ls -Al\n'
1077 +        txt += '\n'
1078          txt += 'cd $RUNTIME_AREA\n'
1079 +        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1080          return txt
1081  
1082      def numberFile_(self, file, txt):
# Line 1268 | Line 1094 | class Cmssw(JobType):
1094              result = name + '_' + txt + "." + ext
1095          else:
1096              result = name + '_' + txt
1097 <        
1097 >
1098          return result
1099  
1100      def getRequirements(self, nj=[]):
1101          """
1102 <        return job requirements to add to jdl files
1102 >        return job requirements to add to jdl files
1103          """
1104          req = ''
1105          if self.version:
1106              req='Member("VO-cms-' + \
1107                   self.version + \
1108                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1109 <        ## SL add requirement for OS version only if SL4
1284 <        #reSL4 = re.compile( r'slc4' )
1285 <        if self.executable_arch: # and reSL4.search(self.executable_arch):
1109 >        if self.executable_arch:
1110              req+=' && Member("VO-cms-' + \
1111                   self.executable_arch + \
1112                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1113  
1114          req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1115 +        if common.scheduler.name() == "glitecoll":
1116 +            req += ' && other.GlueCEStateStatus == "Production" '
1117  
1118          return req
1119  
1120      def configFilename(self):
1121          """ return the config filename """
1122 <        return self.name()+'.cfg'
1122 >        # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
1123 >        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1124 >          return self.name()+'.py'
1125 >        else:
1126 >          return self.name()+'.cfg'
1127  
1298    ### OLI_DANIELE
1128      def wsSetupCMSOSGEnvironment_(self):
1129          """
1130          Returns part of a job script which is prepares
1131          the execution environment and which is common for all CMS jobs.
1132          """
1133 <        txt = '\n'
1134 <        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1135 <        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1136 <        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1137 <        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1138 <        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1310 <        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1133 >        txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1134 >        txt += '    echo ">>> setup CMS OSG environment:"\n'
1135 >        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1136 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1137 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1138 >        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1139          txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1140 <        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1141 <        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1142 <        txt += '   else\n'
1143 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1144 <        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1145 <        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1318 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1319 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1320 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1321 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1322 <        txt += '       exit 1\n'
1323 <        txt += '\n'
1324 <        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1325 <        txt += '       cd $RUNTIME_AREA\n'
1326 <        txt += '       /bin/rm -rf $WORKING_DIR\n'
1327 <        txt += '       if [ -d $WORKING_DIR ] ;then\n'
1328 <        txt += '           echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1329 <        txt += '           echo "JOB_EXIT_STATUS = 10017"\n'
1330 <        txt += '           echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1331 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1332 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1333 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1334 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1335 <        txt += '       fi\n'
1336 <        txt += '\n'
1337 <        txt += '       exit 1\n'
1338 <        txt += '   fi\n'
1140 >        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1141 >        txt += '    else\n'
1142 >        txt += '        echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1143 >        txt += '        job_exit_code=10020\n'
1144 >        txt += '        func_exit\n'
1145 >        txt += '    fi\n'
1146          txt += '\n'
1147 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1148 <        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1147 >        txt += '    echo "==> setup cms environment ok"\n'
1148 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1149  
1150          return txt
1151 <
1345 <    ### OLI_DANIELE
1151 >
1152      def wsSetupCMSLCGEnvironment_(self):
1153          """
1154          Returns part of a job script which is prepares
1155          the execution environment and which is common for all CMS jobs.
1156          """
1157 <        txt  = '   \n'
1158 <        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
1159 <        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
1160 <        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1161 <        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1162 <        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1163 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1164 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1165 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1166 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1167 <        txt += '       exit 1\n'
1168 <        txt += '   else\n'
1169 <        txt += '       echo "Sourcing environment... "\n'
1170 <        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1171 <        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1172 <        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1173 <        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1174 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1175 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1176 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1177 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1178 <        txt += '           exit 1\n'
1179 <        txt += '       fi\n'
1180 <        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1181 <        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1182 <        txt += '       result=$?\n'
1183 <        txt += '       if [ $result -ne 0 ]; then\n'
1378 <        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1379 <        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1380 <        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1381 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1382 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1383 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1384 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1385 <        txt += '           exit 1\n'
1386 <        txt += '       fi\n'
1387 <        txt += '   fi\n'
1388 <        txt += '   \n'
1389 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1390 <        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1157 >        txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1158 >        txt += '    echo ">>> setup CMS LCG environment:"\n'
1159 >        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1160 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1161 >        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1162 >        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1163 >        txt += '        echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1164 >        txt += '        job_exit_code=10031\n'
1165 >        txt += '        func_exit\n'
1166 >        txt += '    else\n'
1167 >        txt += '        echo "Sourcing environment... "\n'
1168 >        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1169 >        txt += '            echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1170 >        txt += '            job_exit_code=10020\n'
1171 >        txt += '            func_exit\n'
1172 >        txt += '        fi\n'
1173 >        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1174 >        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1175 >        txt += '        result=$?\n'
1176 >        txt += '        if [ $result -ne 0 ]; then\n'
1177 >        txt += '            echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1178 >        txt += '            job_exit_code=10032\n'
1179 >        txt += '            func_exit\n'
1180 >        txt += '        fi\n'
1181 >        txt += '    fi\n'
1182 >        txt += '    \n'
1183 >        txt += '    echo "==> setup cms environment ok"\n'
1184          return txt
1185  
1393    ### FEDE FOR DBS OUTPUT PUBLICATION
1186      def modifyReport(self, nj):
1187          """
1188 <        insert the part of the script that modifies the FrameworkJob Report
1188 >        insert the part of the script that modifies the FrameworkJob Report
1189          """
1190 +        txt = '\n#Written by cms_cmssw::modifyReport\n'
1191 +        publish_data = int(self.cfg_params.get('USER.publish_data',0))
1192 +        if (publish_data == 1):
1193 +            processedDataset = self.cfg_params['USER.publish_data_name']
1194 +            LFNBaseName = LFNBase(processedDataset)
1195  
1196 <        txt = ''
1197 <        try:
1198 <            publish_data = int(self.cfg_params['USER.publish_data'])          
1199 <        except KeyError:
1200 <            publish_data = 0
1201 <        if (publish_data == 1):  
1202 <            txt += 'echo "Modify Job Report" \n'
1406 <            #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1407 <            ################ FEDE FOR DBS2 #############################################
1408 <            txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1409 <            #############################################################################
1410 <            #try:
1411 <            #    publish_data = int(self.cfg_params['USER.publish_data'])          
1412 <            #except KeyError:
1413 <            #    publish_data = 0
1414 <
1415 <            txt += 'if [ -z "$SE" ]; then\n'
1416 <            txt += '    SE="" \n'
1417 <            txt += 'fi \n'
1418 <            txt += 'if [ -z "$SE_PATH" ]; then\n'
1419 <            txt += '    SE_PATH="" \n'
1420 <            txt += 'fi \n'
1421 <            txt += 'echo "SE = $SE"\n'
1422 <            txt += 'echo "SE_PATH = $SE_PATH"\n'
1196 >            txt += 'if [ $copy_exit_status -eq 0 ]; then\n'
1197 >            txt += '    FOR_LFN=%s_${PSETHASH}/\n'%(LFNBaseName)
1198 >            txt += 'else\n'
1199 >            txt += '    FOR_LFN=/copy_problems/ \n'
1200 >            txt += '    SE=""\n'
1201 >            txt += '    SE_PATH=""\n'
1202 >            txt += 'fi\n'
1203  
1204 <        #if (publish_data == 1):  
1205 <            #processedDataset = self.cfg_params['USER.processed_datasetname']
1426 <            processedDataset = self.cfg_params['USER.publish_data_name']
1204 >            txt += 'echo ">>> Modify Job Report:" \n'
1205 >            txt += 'chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1206              txt += 'ProcessedDataset='+processedDataset+'\n'
1428            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1429            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1430            #### FEDE: added slash in LFN ##############
1431            txt += '    FOR_LFN=/copy_problems/ \n'
1432            txt += 'else \n'
1433            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1434            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1435            txt += '    FOR_LFN=/store$tmp \n'
1436            txt += 'fi \n'
1207              txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1208 +            txt += 'echo "SE = $SE"\n'
1209 +            txt += 'echo "SE_PATH = $SE_PATH"\n'
1210              txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1211              txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1212 <            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1213 <            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1442 <            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1443 <            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1444 <      
1212 >            txt += 'echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1213 >            txt += '$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1214              txt += 'modifyReport_result=$?\n'
1446            txt += 'echo modifyReport_result = $modifyReport_result\n'
1215              txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1216 <            txt += '    exit_status=1\n'
1217 <            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1216 >            txt += '    modifyReport_result=70500\n'
1217 >            txt += '    job_exit_code=$modifyReport_result\n'
1218 >            txt += '    echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1219 >            txt += '    echo "WARNING: Problem with ModifyJobReport"\n'
1220              txt += 'else\n'
1221 <            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1221 >            txt += '    mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1222              txt += 'fi\n'
1453        else:
1454            txt += 'echo "no data publication required"\n'
1455            #txt += 'ProcessedDataset=no_data_to_publish \n'
1456            #### FEDE: added slash in LFN ##############
1457            #txt += 'FOR_LFN=/local/ \n'
1458            #txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1459            #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1223          return txt
1224  
1225 <    def cleanEnv(self):
1226 <        ### OLI_DANIELE
1227 <        txt = ''
1228 <        txt += 'if [ $middleware == OSG ]; then\n'  
1229 <        txt += '    cd $RUNTIME_AREA\n'
1230 <        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1231 <        txt += '    /bin/rm -rf $WORKING_DIR\n'
1232 <        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1233 <        txt += '              echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1234 <        txt += '              echo "JOB_EXIT_STATUS = 60999"\n'
1235 <        txt += '              echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1236 <        txt += '              dumpStatus $RUNTIME_AREA/$repo\n'
1237 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1238 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1239 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1225 >    def wsParseFJR(self):
1226 >        """
1227 >        Parse the FrameworkJobReport to obtain useful infos
1228 >        """
1229 >        txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1230 >        txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1231 >        txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1232 >        txt += '    if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1233 >        txt += '        cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1234 >        if self.debug_wrapper :
1235 >            txt += '        echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1236 >        txt += '        executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1237 >        txt += '        if [ $executable_exit_status -eq 50115 ];then\n'
1238 >        txt += '            echo ">>> crab_fjr.xml contents: "\n'
1239 >        txt += '            cat $RUNTIME_AREA/crab_fjr_NJob.xml\n'
1240 >        txt += '            echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1241 >        txt += '        elif [ $executable_exit_status -eq -999 ];then\n'
1242 >        txt += '            echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1243 >        txt += '        else\n'
1244 >        txt += '            echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1245 >        txt += '        fi\n'
1246 >        txt += '    else\n'
1247 >        txt += '        echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1248          txt += '    fi\n'
1249 +          #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1250 +
1251 +        if self.datasetPath:
1252 +          # VERIFY PROCESSED DATA
1253 +            txt += '    if [ $executable_exit_status -eq 0 ];then\n'
1254 +            txt += '      echo ">>> Verify list of processed files:"\n'
1255 +            txt += '      echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1256 +            txt += '      python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1257 +            txt += '      cat input-files.txt  | sort | uniq > tmp.txt\n'
1258 +            txt += '      mv tmp.txt input-files.txt\n'
1259 +            txt += '      echo "cat input-files.txt"\n'
1260 +            txt += '      echo "----------------------"\n'
1261 +            txt += '      cat input-files.txt\n'
1262 +            txt += '      cat processed-files.txt | sort | uniq > tmp.txt\n'
1263 +            txt += '      mv tmp.txt processed-files.txt\n'
1264 +            txt += '      echo "----------------------"\n'
1265 +            txt += '      echo "cat processed-files.txt"\n'
1266 +            txt += '      echo "----------------------"\n'
1267 +            txt += '      cat processed-files.txt\n'
1268 +            txt += '      echo "----------------------"\n'
1269 +            txt += '      diff -q input-files.txt processed-files.txt\n'
1270 +            txt += '      fileverify_status=$?\n'
1271 +            txt += '      if [ $fileverify_status -ne 0 ]; then\n'
1272 +            txt += '         executable_exit_status=30001\n'
1273 +            txt += '         echo "ERROR ==> not all input files processed"\n'
1274 +            txt += '         echo "      ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1275 +            txt += '         echo "      ==> diff input-files.txt processed-files.txt"\n'
1276 +            txt += '      fi\n'
1277 +            txt += '    fi\n'
1278 +            txt += '\n'
1279 +        txt += 'else\n'
1280 +        txt += '    echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1281          txt += 'fi\n'
1282          txt += '\n'
1283 +        txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1284 +        txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1285 +        txt += 'job_exit_code=$executable_exit_status\n'
1286 +
1287          return txt
1288  
1289      def setParam_(self, param, value):
# Line 1485 | Line 1292 | class Cmssw(JobType):
1292      def getParams(self):
1293          return self._params
1294  
1488    def setTaskid_(self):
1489        self._taskId = self.cfg_params['taskId']
1490        
1491    def getTaskid(self):
1492        return self._taskId
1493
1295      def uniquelist(self, old):
1296          """
1297          remove duplicates from a list
# Line 1500 | Line 1301 | class Cmssw(JobType):
1301              nd[e]=0
1302          return nd.keys()
1303  
1304 <
1504 <    def checkOut(self, limit):
1304 >    def outList(self):
1305          """
1306          check the dimension of the output files
1307          """
1308 <        txt = 'echo "*****************************************"\n'
1309 <        txt += 'echo "** Starting output sandbox limit check **"\n'
1510 <        txt += 'echo "*****************************************"\n'
1511 <        allOutFiles = ""
1308 >        txt = ''
1309 >        txt += 'echo ">>> list of expected files on output sandbox"\n'
1310          listOutFiles = []
1311 <        for fileOut in (self.output_file+self.output_file_sandbox):
1312 <             if fileOut.find('crab_fjr') == -1:
1313 <                 allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1314 <                 listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1315 <        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1316 <        txt += 'ls -gGhrta;\n'
1317 <        txt += 'sum=0;\n'
1318 <        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1319 <        txt += '    if [ -e $file ]; then\n'
1320 <        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1321 <        txt += '        sum=`expr $sum + $tt`\n'
1322 <        txt += '    else\n'
1323 <        txt += '        echo "WARNING: output file $file not found!"\n'
1324 <        txt += '    fi\n'
1325 <        txt += 'done\n'
1528 <        txt += 'echo "Total Output dimension: $sum";\n'
1529 <        txt += 'limit='+str(limit)+';\n'
1530 <        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1531 <        txt += 'if [ $limit -lt $sum ]; then\n'
1532 <        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1533 <        txt += '    echo "         checking the output file sizes..."\n'
1534 <        """
1535 <        txt += '    dim=0;\n'
1536 <        txt += '    exclude=0;\n'
1537 <        txt += '    for files in '+str(allOutFiles)+' ; do\n'
1538 <        txt += '        sumTemp=0;\n'
1539 <        txt += '        for file2 in '+str(allOutFiles)+' ; do\n'
1540 <        txt += '            if [ $file != $file2 ]; then\n'
1541 <        txt += '                tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1542 <        txt += '                sumTemp=`expr $sumTemp + $tt`;\n'
1543 <        txt += '            fi\n'
1544 <        txt += '        done\n'
1545 <        txt += '        if [ $sumTemp -lt $limit ]; then\n'
1546 <        txt += '            if [ $dim -lt $sumTemp ]; then\n'
1547 <        txt += '                dim=$sumTemp;\n'
1548 <        txt += '                exclude=$file;\n'
1549 <        txt += '            fi\n'
1550 <        txt += '        fi\n'
1551 <        txt += '    done\n'
1552 <        txt += '    echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1553 <        """
1554 <        txt += '    tot=0;\n'
1555 <        txt += '    for file2 in '+str(allOutFiles)+' ; do\n'
1556 <        txt += '        tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1557 <        txt += '        tot=`expr $tot + $tt`;\n'
1558 <        txt += '        if [ $limit -lt $tot ]; then\n'
1559 <        txt += '            tot=`expr $tot - $tt`;\n'
1560 <        txt += '            fileLast=$file;\n'
1561 <        txt += '            break;\n'
1562 <        txt += '        fi\n'
1563 <        txt += '    done\n'
1564 <        txt += '    echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1565 <        txt += '    flag=0;\n'    
1566 <        txt += '    for filess in '+str(allOutFiles)+' ; do\n'
1567 <        txt += '        if [ $fileLast = $filess ]; then\n'
1568 <        txt += '            flag=1;\n'
1569 <        txt += '        fi\n'
1570 <        txt += '        if [ $flag -eq 1 ]; then\n'
1571 <        txt += '            rm -f $filess;\n'
1572 <        txt += '        fi\n'
1573 <        txt += '    done\n'
1574 <        txt += '    ls -agGhrt;\n'
1575 <        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1576 <        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1577 <        txt += '    exit_status=70000;\n'
1578 <        txt += 'else'
1579 <        txt += '    echo "Total Output dimension $sum is fine.";\n'
1580 <        txt += 'fi\n'
1581 <        txt += 'echo "*****************************************"\n'
1582 <        txt += 'echo "*** Ending output sandbox limit check ***"\n'
1583 <        txt += 'echo "*****************************************"\n'
1311 >        stdout = 'CMSSW_$NJob.stdout'
1312 >        stderr = 'CMSSW_$NJob.stderr'
1313 >        if (self.return_data == 1):
1314 >            for file in (self.output_file+self.output_file_sandbox):
1315 >                listOutFiles.append(self.numberFile_(file, '$NJob'))
1316 >            listOutFiles.append(stdout)
1317 >            listOutFiles.append(stderr)
1318 >        else:
1319 >            for file in (self.output_file_sandbox):
1320 >                listOutFiles.append(self.numberFile_(file, '$NJob'))
1321 >            listOutFiles.append(stdout)
1322 >            listOutFiles.append(stderr)
1323 >        txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1324 >        txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1325 >        txt += 'export filesToCheck\n'
1326          return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines