ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.221
Committed: Wed Jun 18 14:02:42 2008 UTC (16 years, 10 months ago) by fanzago
Content type: text/x-python
Branch: MAIN
Changes since 1.220: +9 -1 lines
Log Message:
removed the control of input file in the case of publication with PU

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8 fanzago 1.173 from LFNBaseName import *
9 slacapra 1.1
10 slacapra 1.105 import os, string, glob
11 slacapra 1.1
12     class Cmssw(JobType):
13 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
14 slacapra 1.1 JobType.__init__(self, 'CMSSW')
15     common.logger.debug(3,'CMSSW::__init__')
16 spiga 1.208 self.skip_blocks = skip_blocks
17    
18 mcinquil 1.140 self.argsList = []
19 mcinquil 1.144
20 gutsche 1.3 self._params = {}
21     self.cfg_params = cfg_params
22 fanzago 1.115 # init BlackWhiteListParser
23     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
24    
25 slacapra 1.153 self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
26 gutsche 1.72
27 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
28 gutsche 1.38 self.ncjobs = ncjobs
29    
30 slacapra 1.1 log = common.logger
31 ewv 1.131
32 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
33     self.additional_inbox_files = []
34     self.scriptExe = ''
35     self.executable = ''
36 slacapra 1.71 self.executable_arch = self.scram.getArch()
37 slacapra 1.1 self.tgz_name = 'default.tgz'
38 corvo 1.56 self.scriptName = 'CMSSW.sh'
39 ewv 1.192 self.pset = ''
40 spiga 1.187 self.datasetPath = ''
41 gutsche 1.3
42 gutsche 1.50 # set FJR file name
43     self.fjrFileName = 'crab_fjr.xml'
44    
45 slacapra 1.1 self.version = self.scram.getSWVersion()
46 ewv 1.182 version_array = self.version.split('_')
47 ewv 1.184 self.CMSSW_major = 0
48     self.CMSSW_minor = 0
49     self.CMSSW_patch = 0
50 ewv 1.182 try:
51 ewv 1.184 self.CMSSW_major = int(version_array[1])
52     self.CMSSW_minor = int(version_array[2])
53     self.CMSSW_patch = int(version_array[3])
54 ewv 1.182 except:
55 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
56 ewv 1.182 raise CrabException(msg)
57    
58 slacapra 1.1 ### collect Data cards
59 gutsche 1.66
60 slacapra 1.153 if not cfg_params.has_key('CMSSW.datasetpath'):
61 ewv 1.131 msg = "Error: datasetpath not defined "
62 slacapra 1.1 raise CrabException(msg)
63 fanzago 1.221
64     ### Temporary: added to remove input file control in the case of PU
65     if not cfg_params.has_key('USER.dataset_pu'):
66     self.dataset_pu = 'NONE'
67     else:
68     self.dataset_pu = cfg_params['USER.dataset_pu']
69     ####
70    
71 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
72     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
73     if string.lower(tmp)=='none':
74     self.datasetPath = None
75     self.selectNoInput = 1
76     else:
77     self.datasetPath = tmp
78     self.selectNoInput = 0
79 gutsche 1.5
80 slacapra 1.1 self.dataTiers = []
81 spiga 1.197 self.debugWrap = ''
82     self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False)
83     if self.debug_wrapper: self.debugWrap='--debug'
84 slacapra 1.1 ## now the application
85 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
86     log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
87 slacapra 1.1
88 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
89 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
90 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
91     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
92     if self.pset.lower() != 'none' :
93     if (not os.path.exists(self.pset)):
94     raise CrabException("User defined PSet file "+self.pset+" does not exist")
95     else:
96     self.pset = None
97 slacapra 1.1
98     # output files
99 slacapra 1.53 ## stuff which must be returned always via sandbox
100     self.output_file_sandbox = []
101    
102     # add fjr report by default via sandbox
103     self.output_file_sandbox.append(self.fjrFileName)
104    
105     # other output files to be returned via sandbox or copied to SE
106 mcinquil 1.216 outfileflag = False
107 slacapra 1.153 self.output_file = []
108     tmp = cfg_params.get('CMSSW.output_file',None)
109     if tmp :
110 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
111 mcinquil 1.216 outfileflag = True #output found
112     #else:
113     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
114 slacapra 1.1
115     # script_exe file as additional file in inputSandbox
116 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
117     if self.scriptExe :
118 slacapra 1.176 if not os.path.isfile(self.scriptExe):
119     msg ="ERROR. file "+self.scriptExe+" not found"
120     raise CrabException(msg)
121     self.additional_inbox_files.append(string.strip(self.scriptExe))
122 slacapra 1.70
123 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
124 slacapra 1.176 msg ="Error. script_exe not defined"
125     raise CrabException(msg)
126 spiga 1.42
127 spiga 1.204 # use parent files...
128     self.useParent = self.cfg_params.get('CMSSW.use_parent',False)
129    
130 slacapra 1.1 ## additional input files
131 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
132 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
133 slacapra 1.70 for tmp in tmpAddFiles:
134     tmp = string.strip(tmp)
135     dirname = ''
136     if not tmp[0]=="/": dirname = "."
137 corvo 1.85 files = []
138     if string.find(tmp,"*")>-1:
139     files = glob.glob(os.path.join(dirname, tmp))
140     if len(files)==0:
141     raise CrabException("No additional input file found with this pattern: "+tmp)
142     else:
143     files.append(tmp)
144 slacapra 1.70 for file in files:
145     if not os.path.exists(file):
146     raise CrabException("Additional input file not found: "+file)
147 slacapra 1.45 pass
148 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
149 slacapra 1.1 pass
150     pass
151 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
152 slacapra 1.153 pass
153 gutsche 1.3
154 slacapra 1.9 ## Events per job
155 slacapra 1.153 if cfg_params.has_key('CMSSW.events_per_job'):
156 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
157 slacapra 1.9 self.selectEventsPerJob = 1
158 slacapra 1.153 else:
159 slacapra 1.9 self.eventsPerJob = -1
160     self.selectEventsPerJob = 0
161 ewv 1.131
162 slacapra 1.22 ## number of jobs
163 slacapra 1.153 if cfg_params.has_key('CMSSW.number_of_jobs'):
164 slacapra 1.22 self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
165     self.selectNumberOfJobs = 1
166 slacapra 1.153 else:
167 slacapra 1.22 self.theNumberOfJobs = 0
168     self.selectNumberOfJobs = 0
169 slacapra 1.10
170 slacapra 1.153 if cfg_params.has_key('CMSSW.total_number_of_events'):
171 gutsche 1.35 self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
172     self.selectTotalNumberEvents = 1
173 spiga 1.193 if self.selectNumberOfJobs == 1:
174 spiga 1.202 if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
175 spiga 1.193 msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
176     raise CrabException(msg)
177 slacapra 1.153 else:
178 gutsche 1.35 self.total_number_of_events = 0
179     self.selectTotalNumberEvents = 0
180    
181 spiga 1.187 if self.pset != None:
182 spiga 1.42 if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
183     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
184     raise CrabException(msg)
185     else:
186     if (self.selectNumberOfJobs == 0):
187     msg = 'Must specify number_of_jobs.'
188     raise CrabException(msg)
189 gutsche 1.35
190 ewv 1.160 ## New method of dealing with seeds
191     self.incrementSeeds = []
192     self.preserveSeeds = []
193     if cfg_params.has_key('CMSSW.preserve_seeds'):
194     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
195     for tmp in tmpList:
196     tmp.strip()
197     self.preserveSeeds.append(tmp)
198     if cfg_params.has_key('CMSSW.increment_seeds'):
199     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
200     for tmp in tmpList:
201     tmp.strip()
202     self.incrementSeeds.append(tmp)
203    
204     ## Old method of dealing with seeds
205     ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
206     ## remove
207 slacapra 1.153 self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
208 ewv 1.160 if self.sourceSeed:
209 slacapra 1.177 print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
210     self.incrementSeeds.append('sourceSeed')
211 ewv 1.185 self.incrementSeeds.append('theSource')
212 slacapra 1.153
213     self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
214 ewv 1.160 if self.sourceSeedVtx:
215 slacapra 1.177 print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
216     self.incrementSeeds.append('VtxSmeared')
217 slacapra 1.22
218 slacapra 1.153 self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
219 ewv 1.160 if self.sourceSeedG4:
220 slacapra 1.177 print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
221     self.incrementSeeds.append('g4SimHits')
222 slacapra 1.90
223 slacapra 1.153 self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
224 ewv 1.160 if self.sourceSeedMix:
225 slacapra 1.177 print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
226     self.incrementSeeds.append('mix')
227 slacapra 1.90
228 slacapra 1.153 self.firstRun = cfg_params.get('CMSSW.first_run',None)
229 slacapra 1.90
230 gutsche 1.3
231 ewv 1.147 # Copy/return
232 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
233     self.return_data = int(cfg_params.get('USER.return_data',0))
234 ewv 1.147
235 slacapra 1.1 #DBSDLS-start
236 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
237 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
238     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
239 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
240 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
241 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
242 gutsche 1.35 blockSites = {}
243 slacapra 1.9 if self.datasetPath:
244 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
245 ewv 1.131 #DBSDLS-end
246 slacapra 1.1
247 slacapra 1.9 ## Select Splitting
248 ewv 1.131 if self.selectNoInput:
249 spiga 1.187 if self.pset == None:
250 spiga 1.42 self.jobSplittingForScript()
251     else:
252     self.jobSplittingNoInput()
253 gutsche 1.92 else:
254 corvo 1.56 self.jobSplittingByBlocks(blockSites)
255 gutsche 1.5
256 spiga 1.208 # modify Pset only the first time
257     if isNew:
258     if self.pset != None:
259     import PsetManipulator as pp
260     PsetEdit = pp.PsetManipulator(self.pset)
261     try:
262     # Add FrameworkJobReport to parameter-set, set max events.
263     # Reset later for data jobs by writeCFG which does all modifications
264     PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
265     PsetEdit.maxEvent(self.eventsPerJob)
266     PsetEdit.psetWriter(self.configFilename())
267 slacapra 1.215 ## If present, add TFileService to output files
268     if not int(cfg_params.get('CMSSW.skip_TFileService_output',0)):
269     tfsOutput = PsetEdit.getTFileService()
270     if tfsOutput:
271     if tfsOutput in self.output_file:
272     common.logger.debug(5,"Output from TFileService "+tfsOutput+" already in output files")
273     else:
274 mcinquil 1.216 outfileflag = True #output found
275 slacapra 1.215 self.output_file.append(tfsOutput)
276     common.logger.message("Adding "+tfsOutput+" to output files (from TFileService)")
277 slacapra 1.218 pass
278     pass
279     ## If present and requested, add PoolOutputModule to output files
280 slacapra 1.219 if int(cfg_params.get('CMSSW.get_edm_output',0)):
281 slacapra 1.218 edmOutput = PsetEdit.getPoolOutputModule()
282     if edmOutput:
283     if edmOutput in self.output_file:
284     common.logger.debug(5,"Output from PoolOutputModule "+edmOutput+" already in output files")
285     else:
286     self.output_file.append(edmOutput)
287     common.logger.message("Adding "+edmOutput+" to output files (from PoolOutputModule)")
288     pass
289     pass
290 slacapra 1.215 except CrabException:
291 spiga 1.208 msg='Error while manipulating ParameterSet: exiting...'
292     raise CrabException(msg)
293     ## Prepare inputSandbox TarBall (only the first time)
294     self.tgzNameWithPath = self.getTarBall(self.executable)
295 gutsche 1.3
296 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
297    
298 slacapra 1.86 import DataDiscovery
299     import DataLocation
300 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
301    
302     datasetPath=self.datasetPath
303    
304 slacapra 1.1 ## Contact the DBS
305 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
306 slacapra 1.1 try:
307 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
308 slacapra 1.1 self.pubdata.fetchDBSInfo()
309    
310 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
311 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
312     raise CrabException(msg)
313 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
314 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
315     raise CrabException(msg)
316 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
317 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
318 slacapra 1.1 raise CrabException(msg)
319    
320 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
321 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
322     self.eventsbyfile=self.pubdata.getEventsPerFile()
323 spiga 1.204 self.parentFiles=self.pubdata.getParent()
324 gutsche 1.3
325 slacapra 1.1 ## get max number of events
326 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
327 slacapra 1.1
328     ## Contact the DLS and build a list of sites hosting the fileblocks
329     try:
330 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
331 gutsche 1.6 dataloc.fetchDLSInfo()
332 slacapra 1.41 except DataLocation.DataLocationError , ex:
333 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
334     raise CrabException(msg)
335 ewv 1.131
336 slacapra 1.1
337 gutsche 1.35 sites = dataloc.getSites()
338     allSites = []
339     listSites = sites.values()
340 slacapra 1.63 for listSite in listSites:
341     for oneSite in listSite:
342 gutsche 1.35 allSites.append(oneSite)
343     allSites = self.uniquelist(allSites)
344 gutsche 1.3
345 gutsche 1.92 # screen output
346     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
347    
348 gutsche 1.35 return sites
349 ewv 1.131
350 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
351 slacapra 1.9 """
352 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
353     and no more than one block.
354     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
355     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
356     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
357     self.maxEvents, self.filesbyblock
358     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
359     self.total_number_of_jobs - Total # of jobs
360     self.list_of_args - File(s) job will run on (a list of lists)
361     """
362    
363     # ---- Handle the possible job splitting configurations ---- #
364     if (self.selectTotalNumberEvents):
365     totalEventsRequested = self.total_number_of_events
366     if (self.selectEventsPerJob):
367     eventsPerJobRequested = self.eventsPerJob
368     if (self.selectNumberOfJobs):
369     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
370    
371     # If user requested all the events in the dataset
372     if (totalEventsRequested == -1):
373     eventsRemaining=self.maxEvents
374     # If user requested more events than are in the dataset
375     elif (totalEventsRequested > self.maxEvents):
376     eventsRemaining = self.maxEvents
377     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
378     # If user requested less events than are in the dataset
379     else:
380     eventsRemaining = totalEventsRequested
381 slacapra 1.22
382 slacapra 1.41 # If user requested more events per job than are in the dataset
383     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
384     eventsPerJobRequested = self.maxEvents
385    
386 gutsche 1.35 # For user info at end
387     totalEventCount = 0
388 gutsche 1.3
389 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
390     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
391 slacapra 1.22
392 gutsche 1.35 if (self.selectNumberOfJobs):
393     common.logger.message("May not create the exact number_of_jobs requested.")
394 slacapra 1.23
395 gutsche 1.38 if ( self.ncjobs == 'all' ) :
396     totalNumberOfJobs = 999999999
397     else :
398     totalNumberOfJobs = self.ncjobs
399 ewv 1.131
400 gutsche 1.35 blocks = blockSites.keys()
401     blockCount = 0
402     # Backup variable in case self.maxEvents counted events in a non-included block
403     numBlocksInDataset = len(blocks)
404 gutsche 1.3
405 gutsche 1.35 jobCount = 0
406     list_of_lists = []
407 gutsche 1.3
408 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
409     jobsOfBlock = {}
410    
411 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
412     # ---- we've met the requested total # of events ---- #
413 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
414 gutsche 1.35 block = blocks[blockCount]
415 gutsche 1.44 blockCount += 1
416 gutsche 1.104 if block not in jobsOfBlock.keys() :
417     jobsOfBlock[block] = []
418 ewv 1.131
419 gutsche 1.68 if self.eventsbyblock.has_key(block) :
420     numEventsInBlock = self.eventsbyblock[block]
421     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
422 ewv 1.131
423 gutsche 1.68 files = self.filesbyblock[block]
424     numFilesInBlock = len(files)
425     if (numFilesInBlock <= 0):
426     continue
427     fileCount = 0
428    
429     # ---- New block => New job ---- #
430 ewv 1.131 parString = ""
431 gutsche 1.68 # counter for number of events in files currently worked on
432     filesEventCount = 0
433     # flag if next while loop should touch new file
434     newFile = 1
435     # job event counter
436     jobSkipEventCount = 0
437 ewv 1.131
438 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
439     # ---- total # of events or we've gone over all the files in this block ---- #
440 spiga 1.204 pString=''
441 gutsche 1.68 while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
442     file = files[fileCount]
443 spiga 1.204 if self.useParent:
444     parent = self.parentFiles[file]
445     for f in parent :
446     pString += '\\\"' + f + '\\\"\,'
447     common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
448     common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
449 gutsche 1.68 if newFile :
450     try:
451     numEventsInFile = self.eventsbyfile[file]
452     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
453     # increase filesEventCount
454     filesEventCount += numEventsInFile
455     # Add file to current job
456     parString += '\\\"' + file + '\\\"\,'
457     newFile = 0
458     except KeyError:
459     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
460 ewv 1.131
461 slacapra 1.177 eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
462 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
463 slacapra 1.177 if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
464 gutsche 1.68 # if last file in block
465     if ( fileCount == numFilesInBlock-1 ) :
466     # end job using last file, use remaining events in block
467     # close job and touch new file
468     fullString = parString[:-2]
469 spiga 1.204 if self.useParent:
470     fullParentString = pString[:-2]
471     list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
472     else:
473     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
474 gutsche 1.68 common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
475     self.jobDestination.append(blockSites[block])
476     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
477 gutsche 1.92 # fill jobs of block dictionary
478 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
479 gutsche 1.68 # reset counter
480     jobCount = jobCount + 1
481     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
482     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
483     jobSkipEventCount = 0
484     # reset file
485 spiga 1.204 pString = ""
486 ewv 1.131 parString = ""
487 gutsche 1.68 filesEventCount = 0
488     newFile = 1
489     fileCount += 1
490     else :
491     # go to next file
492     newFile = 1
493     fileCount += 1
494     # if events in file equal to eventsPerJobRequested
495     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
496 gutsche 1.38 # close job and touch new file
497     fullString = parString[:-2]
498 spiga 1.204 if self.useParent:
499     fullParentString = pString[:-2]
500     list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
501     else:
502     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
503 gutsche 1.68 common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
504 gutsche 1.38 self.jobDestination.append(blockSites[block])
505     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
506 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
507 gutsche 1.38 # reset counter
508     jobCount = jobCount + 1
509 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
510     eventsRemaining = eventsRemaining - eventsPerJobRequested
511 gutsche 1.38 jobSkipEventCount = 0
512     # reset file
513 spiga 1.204 pString = ""
514 ewv 1.131 parString = ""
515 gutsche 1.38 filesEventCount = 0
516     newFile = 1
517     fileCount += 1
518 ewv 1.131
519 gutsche 1.68 # if more events in file remain than eventsPerJobRequested
520 gutsche 1.38 else :
521 gutsche 1.68 # close job but don't touch new file
522     fullString = parString[:-2]
523 spiga 1.204 if self.useParent:
524     fullParentString = pString[:-2]
525     list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
526     else:
527     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
528 gutsche 1.68 common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
529     self.jobDestination.append(blockSites[block])
530     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
531 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
532 gutsche 1.68 # increase counter
533     jobCount = jobCount + 1
534     totalEventCount = totalEventCount + eventsPerJobRequested
535     eventsRemaining = eventsRemaining - eventsPerJobRequested
536     # calculate skip events for last file
537     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
538     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
539     # remove all but the last file
540     filesEventCount = self.eventsbyfile[file]
541 spiga 1.204 if self.useParent:
542     for f in parent : pString += '\\\"' + f + '\\\"\,'
543 ewv 1.160 parString = '\\\"' + file + '\\\"\,'
544 gutsche 1.68 pass # END if
545     pass # END while (iterate over files in the block)
546 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
547 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
548 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
549 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
550 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
551 ewv 1.131
552 gutsche 1.92 # screen output
553     screenOutput = "List of jobs and available destination sites:\n\n"
554    
555 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
556     noSiteBlock = []
557     bloskNoSite = []
558    
559 gutsche 1.92 blockCounter = 0
560 gutsche 1.104 for block in blocks:
561     if block in jobsOfBlock.keys() :
562     blockCounter += 1
563 slacapra 1.176 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
564     ','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
565 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
566 ewv 1.131 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
567 mcinquil 1.124 bloskNoSite.append( blockCounter )
568 ewv 1.131
569 mcinquil 1.124 common.logger.message(screenOutput)
570 fanzago 1.127 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
571 mcinquil 1.126 msg = 'WARNING: No sites are hosting any part of data for block:\n '
572     virgola = ""
573     if len(bloskNoSite) > 1:
574     virgola = ","
575     for block in bloskNoSite:
576     msg += ' ' + str(block) + virgola
577     msg += '\n Related jobs:\n '
578     virgola = ""
579     if len(noSiteBlock) > 1:
580     virgola = ","
581     for range_jobs in noSiteBlock:
582     msg += str(range_jobs) + virgola
583     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
584 slacapra 1.155 if self.cfg_params.has_key('EDG.se_white_list'):
585     msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
586     msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
587     msg += 'Please check if the dataset is available at this site!)\n'
588     if self.cfg_params.has_key('EDG.ce_white_list'):
589     msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
590     msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
591     msg += 'Please check if the dataset is available at this site!)\n'
592    
593 mcinquil 1.126 common.logger.message(msg)
594 gutsche 1.92
595 slacapra 1.9 self.list_of_args = list_of_lists
596     return
597    
598 slacapra 1.21 def jobSplittingNoInput(self):
599 slacapra 1.9 """
600     Perform job splitting based on number of event per job
601     """
602     common.logger.debug(5,'Splitting per events')
603 fanzago 1.130
604 ewv 1.131 if (self.selectEventsPerJob):
605 fanzago 1.130 common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
606     if (self.selectNumberOfJobs):
607     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
608     if (self.selectTotalNumberEvents):
609     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
610 slacapra 1.9
611 slacapra 1.10 if (self.total_number_of_events < 0):
612     msg='Cannot split jobs per Events with "-1" as total number of events'
613     raise CrabException(msg)
614    
615 slacapra 1.22 if (self.selectEventsPerJob):
616 spiga 1.65 if (self.selectTotalNumberEvents):
617     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
618 ewv 1.131 elif(self.selectNumberOfJobs) :
619 spiga 1.65 self.total_number_of_jobs =self.theNumberOfJobs
620 ewv 1.131 self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
621 spiga 1.65
622 slacapra 1.22 elif (self.selectNumberOfJobs) :
623     self.total_number_of_jobs = self.theNumberOfJobs
624     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
625 ewv 1.131
626 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
627    
628     # is there any remainder?
629     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
630    
631     common.logger.debug(5,'Check '+str(check))
632    
633 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
634 slacapra 1.9 if check > 0:
635 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
636 slacapra 1.9
637 slacapra 1.10 # argument is seed number.$i
638 slacapra 1.9 self.list_of_args = []
639     for i in range(self.total_number_of_jobs):
640 gutsche 1.35 ## Since there is no input, any site is good
641 ewv 1.131 self.jobDestination.append([""]) #must be empty to write correctly the xml
642 slacapra 1.90 args=[]
643 spiga 1.57 if (self.firstRun):
644 slacapra 1.138 ## pythia first run
645 slacapra 1.90 args.append(str(self.firstRun)+str(i))
646     self.list_of_args.append(args)
647 ewv 1.131
648 gutsche 1.3 return
649    
650 spiga 1.42
651 spiga 1.187 def jobSplittingForScript(self):
652 spiga 1.42 """
653     Perform job splitting based on number of job
654     """
655     common.logger.debug(5,'Splitting per job')
656     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
657    
658     self.total_number_of_jobs = self.theNumberOfJobs
659    
660     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
661    
662     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
663    
664     # argument is seed number.$i
665     self.list_of_args = []
666     for i in range(self.total_number_of_jobs):
667     self.jobDestination.append([""])
668     self.list_of_args.append([str(i)])
669     return
670    
671 spiga 1.208 def split(self, jobParams,firstJobID):
672 ewv 1.131
673 gutsche 1.3 njobs = self.total_number_of_jobs
674 slacapra 1.9 arglist = self.list_of_args
675 gutsche 1.3 # create the empty structure
676     for i in range(njobs):
677     jobParams.append("")
678 ewv 1.131
679 spiga 1.165 listID=[]
680     listField=[]
681 spiga 1.208 for id in range(njobs):
682     job = id + int(firstJobID)
683     jobParams[id] = arglist[id]
684 spiga 1.167 listID.append(job+1)
685 spiga 1.162 job_ToSave ={}
686 spiga 1.169 concString = ' '
687 spiga 1.165 argu=''
688 spiga 1.208 if len(jobParams[id]):
689     argu += concString.join(jobParams[id] )
690 spiga 1.187 job_ToSave['arguments']= str(job+1)+' '+argu
691 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
692 spiga 1.165 listField.append(job_ToSave)
693 spiga 1.169 msg="Job "+str(job)+" Arguments: "+str(job+1)+" "+argu+"\n" \
694 spiga 1.208 +" Destination: "+str(self.jobDestination[id])
695 spiga 1.165 common.logger.debug(5,msg)
696 spiga 1.187 common._db.updateJob_(listID,listField)
697 spiga 1.181 self.argsList = (len(jobParams[0])+1)
698 gutsche 1.3
699     return
700 ewv 1.131
701 gutsche 1.3 def numberOfJobs(self):
702     return self.total_number_of_jobs
703    
704 slacapra 1.1 def getTarBall(self, exe):
705     """
706     Return the TarBall with lib and exe
707     """
708 corvo 1.56 self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
709 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
710     return self.tgzNameWithPath
711    
712     # Prepare a tar gzipped file with user binaries.
713     self.buildTar_(exe)
714    
715     return string.strip(self.tgzNameWithPath)
716    
717     def buildTar_(self, executable):
718    
719     # First of all declare the user Scram area
720     swArea = self.scram.getSWArea_()
721     swReleaseTop = self.scram.getReleaseTop_()
722 ewv 1.131
723 slacapra 1.1 ## check if working area is release top
724     if swReleaseTop == '' or swArea == swReleaseTop:
725 afanfani 1.172 common.logger.debug(3,"swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
726 slacapra 1.1 return
727    
728 slacapra 1.61 import tarfile
729     try: # create tar ball
730     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
731     ## First find the executable
732 slacapra 1.86 if (self.executable != ''):
733 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
734     if ( not exeWithPath ):
735     raise CrabException('User executable '+executable+' not found')
736 ewv 1.131
737 slacapra 1.61 ## then check if it's private or not
738     if exeWithPath.find(swReleaseTop) == -1:
739     # the exe is private, so we must ship
740     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
741     path = swArea+'/'
742 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
743     if exeWithPath.find(path) >= 0 :
744     exe = string.replace(exeWithPath, path,'')
745 slacapra 1.129 tar.add(path+exe,exe)
746 corvo 1.85 else :
747     tar.add(exeWithPath,os.path.basename(executable))
748 slacapra 1.61 pass
749     else:
750     # the exe is from release, we'll find it on WN
751     pass
752 ewv 1.131
753 slacapra 1.61 ## Now get the libraries: only those in local working area
754     libDir = 'lib'
755     lib = swArea+'/' +libDir
756     common.logger.debug(5,"lib "+lib+" to be tarred")
757     if os.path.exists(lib):
758     tar.add(lib,libDir)
759 ewv 1.131
760 slacapra 1.61 ## Now check if module dir is present
761     moduleDir = 'module'
762     module = swArea + '/' + moduleDir
763     if os.path.isdir(module):
764     tar.add(module,moduleDir)
765    
766     ## Now check if any data dir(s) is present
767 spiga 1.179 self.dataExist = False
768 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
769 slacapra 1.206 while len(todo_list):
770     entry, name = todo_list.pop()
771 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
772 slacapra 1.206 continue
773 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
774 slacapra 1.206 entryPath = entry + '/'
775 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
776 slacapra 1.206 if name == 'data':
777     self.dataExist=True
778     common.logger.debug(5,"data "+entry+" to be tarred")
779 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
780 slacapra 1.206 pass
781     pass
782 ewv 1.182
783 spiga 1.179 ### CMSSW ParameterSet
784     if not self.pset is None:
785     cfg_file = common.work_space.jobDir()+self.configFilename()
786 ewv 1.182 tar.add(cfg_file,self.configFilename())
787 spiga 1.179 common.logger.debug(5,"File added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
788 slacapra 1.61
789 fanzago 1.93
790 fanzago 1.152 ## Add ProdCommon dir to tar
791 slacapra 1.211 prodcommonDir = './'
792     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
793 slacapra 1.214 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools','ProdCommon/Core','ProdCommon/MCPayloads', 'IMProv']
794     for file in neededStuff:
795     tar.add(prodcommonPath+file,prodcommonDir+file)
796 spiga 1.179 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
797    
798     ##### ML stuff
799     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
800     path=os.environ['CRABDIR'] + '/python/'
801     for file in ML_file_list:
802     tar.add(path+file,file)
803     common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
804    
805     ##### Utils
806 spiga 1.203 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py']
807 spiga 1.179 for file in Utils_file_list:
808     tar.add(path+file,file)
809     common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
810 ewv 1.131
811 ewv 1.182 ##### AdditionalFiles
812 spiga 1.179 for file in self.additional_inbox_files:
813     tar.add(file,string.split(file,'/')[-1])
814 slacapra 1.61 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
815 ewv 1.182
816 slacapra 1.61 tar.close()
817 slacapra 1.220 except IOError:
818     raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
819 slacapra 1.212 except tarfile.TarError:
820 slacapra 1.206 raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
821 gutsche 1.72
822     ## check for tarball size
823     tarballinfo = os.stat(self.tgzNameWithPath)
824     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
825     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
826    
827 slacapra 1.61 ## create tar-ball with ML stuff
828 slacapra 1.97
829 spiga 1.165 def wsSetupEnvironment(self, nj=0):
830 slacapra 1.1 """
831     Returns part of a job script which prepares
832     the execution environment for the job 'nj'.
833     """
834 ewv 1.184 if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
835     psetName = 'pset.py'
836     else:
837     psetName = 'pset.cfg'
838 slacapra 1.1 # Prepare JobType-independent part
839 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
840 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
841 ewv 1.131 txt += 'if [ $middleware == LCG ]; then \n'
842 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
843     txt += 'elif [ $middleware == OSG ]; then\n'
844 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
845 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
846 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
847     txt += ' job_exit_code=10016\n'
848     txt += ' func_exit\n'
849 gutsche 1.3 txt += ' fi\n'
850 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
851 gutsche 1.3 txt += '\n'
852     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
853     txt += ' cd $WORKING_DIR\n'
854 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
855 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
856 gutsche 1.3 txt += 'fi\n'
857 slacapra 1.1
858     # Prepare JobType-specific part
859     scram = self.scram.commandName()
860     txt += '\n\n'
861 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
862     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
863 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
864     txt += 'status=$?\n'
865     txt += 'if [ $status != 0 ] ; then\n'
866 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
867     txt += ' job_exit_code=10034\n'
868 fanzago 1.163 txt += ' func_exit\n'
869 slacapra 1.1 txt += 'fi \n'
870     txt += 'cd '+self.version+'\n'
871 fanzago 1.99 txt += 'SOFTWARE_DIR=`pwd`\n'
872 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
873 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
874 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
875     txt += ' echo "ERROR ==> Problem with the command: "\n'
876     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
877     txt += ' job_exit_code=10034\n'
878     txt += ' func_exit\n'
879     txt += 'fi \n'
880 slacapra 1.1 # Handle the arguments:
881     txt += "\n"
882 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
883 slacapra 1.1 txt += "\n"
884 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
885 slacapra 1.1 txt += "then\n"
886 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
887     txt += ' job_exit_code=50113\n'
888     txt += " func_exit\n"
889 slacapra 1.1 txt += "fi\n"
890     txt += "\n"
891    
892     # Prepare job-specific part
893     job = common.job_list[nj]
894 ewv 1.131 if (self.datasetPath):
895 fanzago 1.93 txt += '\n'
896     txt += 'DatasetPath='+self.datasetPath+'\n'
897    
898     datasetpath_split = self.datasetPath.split("/")
899 ewv 1.131
900 fanzago 1.93 txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
901     txt += 'DataTier='+datasetpath_split[2]+'\n'
902 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
903 fanzago 1.93
904     else:
905     txt += 'DatasetPath=MCDataTier\n'
906     txt += 'PrimaryDataset=null\n'
907     txt += 'DataTier=null\n'
908     txt += 'ApplicationFamily=MCDataTier\n'
909 ewv 1.170 if self.pset != None:
910 spiga 1.42 pset = os.path.basename(job.configFilename())
911     txt += '\n'
912 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
913 spiga 1.42 if (self.datasetPath): # standard job
914 ewv 1.160 txt += 'InputFiles=${args[1]}; export InputFiles\n'
915 spiga 1.204 if (self.useParent):
916     txt += 'ParentFiles=${args[2]}; export ParentFiles\n'
917     txt += 'MaxEvents=${args[3]}; export MaxEvents\n'
918     txt += 'SkipEvents=${args[4]}; export SkipEvents\n'
919     else:
920     txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
921     txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
922 spiga 1.42 txt += 'echo "Inputfiles:<$InputFiles>"\n'
923 spiga 1.204 if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n'
924 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
925     txt += 'echo "SkipEvents:<$SkipEvents>"\n'
926     else: # pythia like job
927 ewv 1.160 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
928     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
929     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
930     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
931 slacapra 1.90 if (self.firstRun):
932 ewv 1.160 txt += 'FirstRun=${args[1]}; export FirstRun\n'
933 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
934 slacapra 1.90
935 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
936 slacapra 1.1
937    
938 fanzago 1.163 if self.pset != None:
939 ewv 1.184 # FUTURE: Can simply for 2_1_x and higher
940 spiga 1.42 txt += '\n'
941 spiga 1.197 if self.debug_wrapper==True:
942 spiga 1.188 txt += 'echo "***** cat ' + psetName + ' *********"\n'
943     txt += 'cat ' + psetName + '\n'
944     txt += 'echo "****** end ' + psetName + ' ********"\n'
945     txt += '\n'
946 ewv 1.184 txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
947 fanzago 1.94 txt += 'echo "PSETHASH = $PSETHASH" \n'
948 fanzago 1.93 txt += '\n'
949 gutsche 1.3 return txt
950 slacapra 1.176
951 fanzago 1.166 def wsUntarSoftware(self, nj=0):
952 gutsche 1.3 """
953     Put in the script the commands to build an executable
954     or a library.
955     """
956    
957 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
958 gutsche 1.3
959     if os.path.isfile(self.tgzNameWithPath):
960 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
961 gutsche 1.3 txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
962 spiga 1.199 if self.debug_wrapper:
963     txt += 'ls -Al \n'
964 gutsche 1.3 txt += 'untar_status=$? \n'
965     txt += 'if [ $untar_status -ne 0 ]; then \n'
966 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
967     txt += ' job_exit_code=$untar_status\n'
968     txt += ' func_exit\n'
969 gutsche 1.3 txt += 'else \n'
970     txt += ' echo "Successful untar" \n'
971     txt += 'fi \n'
972 gutsche 1.50 txt += '\n'
973 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
974 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
975 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
976 gutsche 1.50 txt += 'else\n'
977 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
978 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
979 gutsche 1.50 txt += 'fi\n'
980     txt += '\n'
981    
982 gutsche 1.3 pass
983 ewv 1.131
984 slacapra 1.1 return txt
985 ewv 1.170
986 fanzago 1.166 def wsBuildExe(self, nj=0):
987     """
988     Put in the script the commands to build an executable
989     or a library.
990     """
991    
992     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
993     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
994    
995 ewv 1.170 txt += 'rm -r lib/ module/ \n'
996     txt += 'mv $RUNTIME_AREA/lib/ . \n'
997     txt += 'mv $RUNTIME_AREA/module/ . \n'
998 spiga 1.186 if self.dataExist == True:
999     txt += 'rm -r src/ \n'
1000     txt += 'mv $RUNTIME_AREA/src/ . \n'
1001 ewv 1.182 if len(self.additional_inbox_files)>0:
1002 spiga 1.179 for file in self.additional_inbox_files:
1003 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
1004 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
1005     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
1006 ewv 1.170
1007 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
1008 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1009 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
1010 fanzago 1.166 txt += 'else\n'
1011 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
1012 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1013     txt += 'fi\n'
1014     txt += '\n'
1015    
1016     return txt
1017 slacapra 1.1
1018 ewv 1.131
1019 slacapra 1.1 def executableName(self):
1020 ewv 1.192 if self.scriptExe:
1021 spiga 1.42 return "sh "
1022     else:
1023     return self.executable
1024 slacapra 1.1
1025     def executableArgs(self):
1026 ewv 1.160 # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
1027 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1028 spiga 1.42 return self.scriptExe + " $NJob"
1029 fanzago 1.115 else:
1030 ewv 1.160 ex_args = ""
1031 ewv 1.171 # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
1032 ewv 1.160 # Framework job report
1033 ewv 1.184 if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
1034 fanzago 1.166 ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
1035 ewv 1.184 # Type of config file
1036     if self.CMSSW_major >= 2 :
1037 ewv 1.171 ex_args += " -p pset.py"
1038 fanzago 1.115 else:
1039 ewv 1.160 ex_args += " -p pset.cfg"
1040     return ex_args
1041 slacapra 1.1
1042     def inputSandbox(self, nj):
1043     """
1044     Returns a list of filenames to be put in JDL input sandbox.
1045     """
1046     inp_box = []
1047     if os.path.isfile(self.tgzNameWithPath):
1048     inp_box.append(self.tgzNameWithPath)
1049 spiga 1.168 wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
1050     inp_box.append(common.work_space.pathForTgz() +'job/'+ wrapper)
1051 slacapra 1.1 return inp_box
1052    
1053     def outputSandbox(self, nj):
1054     """
1055     Returns a list of filenames to be put in JDL output sandbox.
1056     """
1057     out_box = []
1058    
1059     ## User Declared output files
1060 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1061 ewv 1.131 n_out = nj + 1
1062 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
1063 slacapra 1.1 return out_box
1064    
1065    
1066     def wsRenameOutput(self, nj):
1067     """
1068     Returns part of a job script which renames the produced files.
1069     """
1070    
1071 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1072 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1073     txt += 'echo ">>> current directory content:"\n'
1074 spiga 1.199 if self.debug_wrapper:
1075     txt += 'ls -Al\n'
1076 fanzago 1.145 txt += '\n'
1077 slacapra 1.54
1078 fanzago 1.128 for fileWithSuffix in (self.output_file):
1079 slacapra 1.207 output_file_num = numberFile(fileWithSuffix, '$NJob')
1080 slacapra 1.1 txt += '\n'
1081 gutsche 1.7 txt += '# check output file\n'
1082 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1083 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1084     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
1085 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1086 ewv 1.147 else:
1087     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1088     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1089 slacapra 1.106 txt += 'else\n'
1090 fanzago 1.161 txt += ' job_exit_code=60302\n'
1091     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1092 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
1093 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
1094     txt += ' echo "prepare dummy output file"\n'
1095     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1096     txt += ' fi \n'
1097 slacapra 1.1 txt += 'fi\n'
1098 slacapra 1.105 file_list = []
1099     for fileWithSuffix in (self.output_file):
1100 slacapra 1.207 file_list.append(numberFile(fileWithSuffix, '$NJob'))
1101 ewv 1.131
1102 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1103 fanzago 1.149 txt += '\n'
1104 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1105     txt += 'echo ">>> current directory content:"\n'
1106 spiga 1.199 if self.debug_wrapper:
1107     txt += 'ls -Al\n'
1108 fanzago 1.148 txt += '\n'
1109 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1110 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1111 slacapra 1.1 return txt
1112    
1113 slacapra 1.63 def getRequirements(self, nj=[]):
1114 slacapra 1.1 """
1115 ewv 1.131 return job requirements to add to jdl files
1116 slacapra 1.1 """
1117     req = ''
1118 slacapra 1.47 if self.version:
1119 slacapra 1.10 req='Member("VO-cms-' + \
1120 slacapra 1.47 self.version + \
1121 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1122 ewv 1.192 if self.executable_arch:
1123 gutsche 1.107 req+=' && Member("VO-cms-' + \
1124 slacapra 1.105 self.executable_arch + \
1125     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1126 gutsche 1.35
1127     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1128 afanfani 1.158 if common.scheduler.name() == "glitecoll":
1129     req += ' && other.GlueCEStateStatus == "Production" '
1130 gutsche 1.35
1131 slacapra 1.1 return req
1132 gutsche 1.3
1133     def configFilename(self):
1134     """ return the config filename """
1135 ewv 1.182 # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
1136 ewv 1.184 if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1137 ewv 1.182 return self.name()+'.py'
1138     else:
1139     return self.name()+'.cfg'
1140 gutsche 1.3
1141     def wsSetupCMSOSGEnvironment_(self):
1142     """
1143     Returns part of a job script which is prepares
1144     the execution environment and which is common for all CMS jobs.
1145     """
1146 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1147     txt += ' echo ">>> setup CMS OSG environment:"\n'
1148 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1149     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1150 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1151 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1152 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1153 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1154     txt += ' else\n'
1155 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1156     txt += ' job_exit_code=10020\n'
1157     txt += ' func_exit\n'
1158 fanzago 1.133 txt += ' fi\n'
1159 gutsche 1.3 txt += '\n'
1160 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1161 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1162 gutsche 1.3
1163     return txt
1164 ewv 1.131
1165 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
1166     """
1167     Returns part of a job script which is prepares
1168     the execution environment and which is common for all CMS jobs.
1169     """
1170 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1171     txt += ' echo ">>> setup CMS LCG environment:"\n'
1172 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1173     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1174     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1175     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1176 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1177     txt += ' job_exit_code=10031\n'
1178     txt += ' func_exit\n'
1179 fanzago 1.133 txt += ' else\n'
1180     txt += ' echo "Sourcing environment... "\n'
1181     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1182 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1183     txt += ' job_exit_code=10020\n'
1184     txt += ' func_exit\n'
1185 fanzago 1.133 txt += ' fi\n'
1186     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1187     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1188     txt += ' result=$?\n'
1189     txt += ' if [ $result -ne 0 ]; then\n'
1190 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1191     txt += ' job_exit_code=10032\n'
1192     txt += ' func_exit\n'
1193 fanzago 1.133 txt += ' fi\n'
1194     txt += ' fi\n'
1195     txt += ' \n'
1196 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1197 gutsche 1.3 return txt
1198 gutsche 1.5
1199 fanzago 1.93 def modifyReport(self, nj):
1200     """
1201 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1202 fanzago 1.93 """
1203 ewv 1.160 txt = '\n#Written by cms_cmssw::modifyReport\n'
1204 slacapra 1.176 publish_data = int(self.cfg_params.get('USER.publish_data',0))
1205 ewv 1.131 if (publish_data == 1):
1206 fanzago 1.94 processedDataset = self.cfg_params['USER.publish_data_name']
1207 fanzago 1.173 LFNBaseName = LFNBase(processedDataset)
1208 fanzago 1.175
1209     txt += 'if [ $copy_exit_status -eq 0 ]; then\n'
1210 fanzago 1.173 txt += ' FOR_LFN=%s_${PSETHASH}/\n'%(LFNBaseName)
1211 fanzago 1.175 txt += 'else\n'
1212     txt += ' FOR_LFN=/copy_problems/ \n'
1213     txt += ' SE=""\n'
1214     txt += ' SE_PATH=""\n'
1215     txt += 'fi\n'
1216 ewv 1.182
1217 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1218 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1219 fanzago 1.175 txt += 'ProcessedDataset='+processedDataset+'\n'
1220     txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1221     txt += 'echo "SE = $SE"\n'
1222     txt += 'echo "SE_PATH = $SE_PATH"\n'
1223     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1224     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1225 fanzago 1.217 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1226     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1227 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1228     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1229     txt += ' modifyReport_result=70500\n'
1230     txt += ' job_exit_code=$modifyReport_result\n'
1231     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1232     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1233     txt += 'else\n'
1234     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1235 spiga 1.103 txt += 'fi\n'
1236 fanzago 1.93 return txt
1237 fanzago 1.99
1238 ewv 1.192 def wsParseFJR(self):
1239 spiga 1.189 """
1240 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1241 spiga 1.189 """
1242     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1243     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1244     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1245     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1246 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1247     if self.debug_wrapper :
1248     txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1249     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1250 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1251     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1252     txt += ' cat $RUNTIME_AREA/crab_fjr_NJob.xml\n'
1253     txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1254 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1255     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1256 spiga 1.189 txt += ' else\n'
1257     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1258     txt += ' fi\n'
1259     txt += ' else\n'
1260     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1261     txt += ' fi\n'
1262     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1263    
1264 fanzago 1.221 if (self.datasetPath and self.dataset_pu == 'NONE'):
1265 spiga 1.189 # VERIFY PROCESSED DATA
1266     txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1267     txt += ' echo ">>> Verify list of processed files:"\n'
1268 ewv 1.196 txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1269 spiga 1.200 txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1270 spiga 1.189 txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1271     txt += ' mv tmp.txt input-files.txt\n'
1272     txt += ' echo "cat input-files.txt"\n'
1273     txt += ' echo "----------------------"\n'
1274     txt += ' cat input-files.txt\n'
1275     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1276     txt += ' mv tmp.txt processed-files.txt\n'
1277     txt += ' echo "----------------------"\n'
1278     txt += ' echo "cat processed-files.txt"\n'
1279     txt += ' echo "----------------------"\n'
1280     txt += ' cat processed-files.txt\n'
1281     txt += ' echo "----------------------"\n'
1282     txt += ' diff -q input-files.txt processed-files.txt\n'
1283     txt += ' fileverify_status=$?\n'
1284     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1285     txt += ' executable_exit_status=30001\n'
1286     txt += ' echo "ERROR ==> not all input files processed"\n'
1287     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1288     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1289     txt += ' fi\n'
1290     txt += ' fi\n'
1291     txt += '\n'
1292     txt += 'else\n'
1293     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1294     txt += 'fi\n'
1295     txt += '\n'
1296     txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1297     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1298     txt += 'job_exit_code=$executable_exit_status\n'
1299    
1300     return txt
1301    
1302 gutsche 1.5 def setParam_(self, param, value):
1303     self._params[param] = value
1304    
1305     def getParams(self):
1306     return self._params
1307 gutsche 1.8
1308 gutsche 1.35 def uniquelist(self, old):
1309     """
1310     remove duplicates from a list
1311     """
1312     nd={}
1313     for e in old:
1314     nd[e]=0
1315     return nd.keys()
1316 mcinquil 1.121
1317 spiga 1.169 def outList(self):
1318 mcinquil 1.121 """
1319     check the dimension of the output files
1320     """
1321 spiga 1.169 txt = ''
1322     txt += 'echo ">>> list of expected files on output sandbox"\n'
1323 mcinquil 1.121 listOutFiles = []
1324 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1325 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1326 fanzago 1.148 if (self.return_data == 1):
1327 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1328 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1329 spiga 1.169 listOutFiles.append(stdout)
1330     listOutFiles.append(stderr)
1331 ewv 1.156 else:
1332 spiga 1.157 for file in (self.output_file_sandbox):
1333 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1334 spiga 1.169 listOutFiles.append(stdout)
1335     listOutFiles.append(stderr)
1336 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1337 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1338 spiga 1.169 txt += 'export filesToCheck\n'
1339 ewv 1.170 return txt