ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.225
Committed: Fri Jun 27 14:21:26 2008 UTC (16 years, 10 months ago) by fanzago
Content type: text/x-python
Branch: MAIN
Changes since 1.224: +8 -1 lines
Log Message:
move rfcp function from SchedulerLocal to cmscp in crab_template and added some changes for publication using CAF and LSF

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8 fanzago 1.173 from LFNBaseName import *
9 slacapra 1.1
10 slacapra 1.105 import os, string, glob
11 slacapra 1.1
12     class Cmssw(JobType):
13 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
14 slacapra 1.1 JobType.__init__(self, 'CMSSW')
15     common.logger.debug(3,'CMSSW::__init__')
16 spiga 1.208 self.skip_blocks = skip_blocks
17    
18 mcinquil 1.140 self.argsList = []
19 mcinquil 1.144
20 gutsche 1.3 self._params = {}
21     self.cfg_params = cfg_params
22 fanzago 1.115 # init BlackWhiteListParser
23     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
24    
25 slacapra 1.153 self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
26 gutsche 1.72
27 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
28 gutsche 1.38 self.ncjobs = ncjobs
29    
30 slacapra 1.1 log = common.logger
31 ewv 1.131
32 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
33     self.additional_inbox_files = []
34     self.scriptExe = ''
35     self.executable = ''
36 slacapra 1.71 self.executable_arch = self.scram.getArch()
37 slacapra 1.1 self.tgz_name = 'default.tgz'
38 corvo 1.56 self.scriptName = 'CMSSW.sh'
39 ewv 1.192 self.pset = ''
40 spiga 1.187 self.datasetPath = ''
41 gutsche 1.3
42 gutsche 1.50 # set FJR file name
43     self.fjrFileName = 'crab_fjr.xml'
44    
45 slacapra 1.1 self.version = self.scram.getSWVersion()
46 ewv 1.182 version_array = self.version.split('_')
47 ewv 1.184 self.CMSSW_major = 0
48     self.CMSSW_minor = 0
49     self.CMSSW_patch = 0
50 ewv 1.182 try:
51 ewv 1.184 self.CMSSW_major = int(version_array[1])
52     self.CMSSW_minor = int(version_array[2])
53     self.CMSSW_patch = int(version_array[3])
54 ewv 1.182 except:
55 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
56 ewv 1.182 raise CrabException(msg)
57    
58 slacapra 1.1 ### collect Data cards
59 gutsche 1.66
60 slacapra 1.153 if not cfg_params.has_key('CMSSW.datasetpath'):
61 ewv 1.131 msg = "Error: datasetpath not defined "
62 slacapra 1.1 raise CrabException(msg)
63 fanzago 1.221
64     ### Temporary: added to remove input file control in the case of PU
65 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
66 fanzago 1.221
67 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
68     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
69     if string.lower(tmp)=='none':
70     self.datasetPath = None
71     self.selectNoInput = 1
72     else:
73     self.datasetPath = tmp
74     self.selectNoInput = 0
75 gutsche 1.5
76 slacapra 1.1 self.dataTiers = []
77 spiga 1.197 self.debugWrap = ''
78     self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False)
79     if self.debug_wrapper: self.debugWrap='--debug'
80 slacapra 1.1 ## now the application
81 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
82     log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
83 slacapra 1.1
84 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
85 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
86 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
87     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
88     if self.pset.lower() != 'none' :
89     if (not os.path.exists(self.pset)):
90     raise CrabException("User defined PSet file "+self.pset+" does not exist")
91     else:
92     self.pset = None
93 slacapra 1.1
94     # output files
95 slacapra 1.53 ## stuff which must be returned always via sandbox
96     self.output_file_sandbox = []
97    
98     # add fjr report by default via sandbox
99     self.output_file_sandbox.append(self.fjrFileName)
100    
101     # other output files to be returned via sandbox or copied to SE
102 mcinquil 1.216 outfileflag = False
103 slacapra 1.153 self.output_file = []
104     tmp = cfg_params.get('CMSSW.output_file',None)
105     if tmp :
106 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
107 mcinquil 1.216 outfileflag = True #output found
108     #else:
109     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
110 slacapra 1.1
111     # script_exe file as additional file in inputSandbox
112 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
113     if self.scriptExe :
114 slacapra 1.176 if not os.path.isfile(self.scriptExe):
115     msg ="ERROR. file "+self.scriptExe+" not found"
116     raise CrabException(msg)
117     self.additional_inbox_files.append(string.strip(self.scriptExe))
118 slacapra 1.70
119 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
120 slacapra 1.176 msg ="Error. script_exe not defined"
121     raise CrabException(msg)
122 spiga 1.42
123 spiga 1.204 # use parent files...
124     self.useParent = self.cfg_params.get('CMSSW.use_parent',False)
125    
126 slacapra 1.1 ## additional input files
127 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
128 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
129 slacapra 1.70 for tmp in tmpAddFiles:
130     tmp = string.strip(tmp)
131     dirname = ''
132     if not tmp[0]=="/": dirname = "."
133 corvo 1.85 files = []
134     if string.find(tmp,"*")>-1:
135     files = glob.glob(os.path.join(dirname, tmp))
136     if len(files)==0:
137     raise CrabException("No additional input file found with this pattern: "+tmp)
138     else:
139     files.append(tmp)
140 slacapra 1.70 for file in files:
141     if not os.path.exists(file):
142     raise CrabException("Additional input file not found: "+file)
143 slacapra 1.45 pass
144 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
145 slacapra 1.1 pass
146     pass
147 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
148 slacapra 1.153 pass
149 gutsche 1.3
150 slacapra 1.9 ## Events per job
151 slacapra 1.153 if cfg_params.has_key('CMSSW.events_per_job'):
152 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
153 slacapra 1.9 self.selectEventsPerJob = 1
154 slacapra 1.153 else:
155 slacapra 1.9 self.eventsPerJob = -1
156     self.selectEventsPerJob = 0
157 ewv 1.131
158 slacapra 1.22 ## number of jobs
159 slacapra 1.153 if cfg_params.has_key('CMSSW.number_of_jobs'):
160 slacapra 1.22 self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
161     self.selectNumberOfJobs = 1
162 slacapra 1.153 else:
163 slacapra 1.22 self.theNumberOfJobs = 0
164     self.selectNumberOfJobs = 0
165 slacapra 1.10
166 slacapra 1.153 if cfg_params.has_key('CMSSW.total_number_of_events'):
167 gutsche 1.35 self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
168     self.selectTotalNumberEvents = 1
169 spiga 1.193 if self.selectNumberOfJobs == 1:
170 spiga 1.202 if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
171 spiga 1.193 msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
172     raise CrabException(msg)
173 slacapra 1.153 else:
174 gutsche 1.35 self.total_number_of_events = 0
175     self.selectTotalNumberEvents = 0
176    
177 spiga 1.187 if self.pset != None:
178 spiga 1.42 if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
179     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
180     raise CrabException(msg)
181     else:
182     if (self.selectNumberOfJobs == 0):
183     msg = 'Must specify number_of_jobs.'
184     raise CrabException(msg)
185 gutsche 1.35
186 ewv 1.160 ## New method of dealing with seeds
187     self.incrementSeeds = []
188     self.preserveSeeds = []
189     if cfg_params.has_key('CMSSW.preserve_seeds'):
190     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
191     for tmp in tmpList:
192     tmp.strip()
193     self.preserveSeeds.append(tmp)
194     if cfg_params.has_key('CMSSW.increment_seeds'):
195     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
196     for tmp in tmpList:
197     tmp.strip()
198     self.incrementSeeds.append(tmp)
199    
200     ## Old method of dealing with seeds
201     ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
202     ## remove
203 slacapra 1.153 self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
204 ewv 1.160 if self.sourceSeed:
205 slacapra 1.177 print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
206     self.incrementSeeds.append('sourceSeed')
207 ewv 1.185 self.incrementSeeds.append('theSource')
208 slacapra 1.153
209     self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
210 ewv 1.160 if self.sourceSeedVtx:
211 slacapra 1.177 print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
212     self.incrementSeeds.append('VtxSmeared')
213 slacapra 1.22
214 slacapra 1.153 self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
215 ewv 1.160 if self.sourceSeedG4:
216 slacapra 1.177 print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
217     self.incrementSeeds.append('g4SimHits')
218 slacapra 1.90
219 slacapra 1.153 self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
220 ewv 1.160 if self.sourceSeedMix:
221 slacapra 1.177 print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
222     self.incrementSeeds.append('mix')
223 slacapra 1.90
224 slacapra 1.153 self.firstRun = cfg_params.get('CMSSW.first_run',None)
225 slacapra 1.90
226 gutsche 1.3
227 ewv 1.147 # Copy/return
228 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
229     self.return_data = int(cfg_params.get('USER.return_data',0))
230 ewv 1.147
231 slacapra 1.1 #DBSDLS-start
232 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
233 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
234     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
235 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
236 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
237 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
238 gutsche 1.35 blockSites = {}
239 slacapra 1.9 if self.datasetPath:
240 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
241 ewv 1.131 #DBSDLS-end
242 slacapra 1.1
243 slacapra 1.9 ## Select Splitting
244 ewv 1.131 if self.selectNoInput:
245 spiga 1.187 if self.pset == None:
246 spiga 1.42 self.jobSplittingForScript()
247     else:
248     self.jobSplittingNoInput()
249 gutsche 1.92 else:
250 corvo 1.56 self.jobSplittingByBlocks(blockSites)
251 gutsche 1.5
252 spiga 1.208 # modify Pset only the first time
253     if isNew:
254     if self.pset != None:
255     import PsetManipulator as pp
256     PsetEdit = pp.PsetManipulator(self.pset)
257     try:
258     # Add FrameworkJobReport to parameter-set, set max events.
259     # Reset later for data jobs by writeCFG which does all modifications
260     PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
261     PsetEdit.maxEvent(self.eventsPerJob)
262     PsetEdit.psetWriter(self.configFilename())
263 slacapra 1.215 ## If present, add TFileService to output files
264     if not int(cfg_params.get('CMSSW.skip_TFileService_output',0)):
265     tfsOutput = PsetEdit.getTFileService()
266     if tfsOutput:
267     if tfsOutput in self.output_file:
268     common.logger.debug(5,"Output from TFileService "+tfsOutput+" already in output files")
269     else:
270 mcinquil 1.216 outfileflag = True #output found
271 slacapra 1.215 self.output_file.append(tfsOutput)
272     common.logger.message("Adding "+tfsOutput+" to output files (from TFileService)")
273 slacapra 1.218 pass
274     pass
275     ## If present and requested, add PoolOutputModule to output files
276 slacapra 1.219 if int(cfg_params.get('CMSSW.get_edm_output',0)):
277 slacapra 1.218 edmOutput = PsetEdit.getPoolOutputModule()
278     if edmOutput:
279     if edmOutput in self.output_file:
280     common.logger.debug(5,"Output from PoolOutputModule "+edmOutput+" already in output files")
281     else:
282     self.output_file.append(edmOutput)
283     common.logger.message("Adding "+edmOutput+" to output files (from PoolOutputModule)")
284     pass
285     pass
286 slacapra 1.215 except CrabException:
287 spiga 1.208 msg='Error while manipulating ParameterSet: exiting...'
288     raise CrabException(msg)
289     ## Prepare inputSandbox TarBall (only the first time)
290     self.tgzNameWithPath = self.getTarBall(self.executable)
291 gutsche 1.3
292 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
293    
294 slacapra 1.86 import DataDiscovery
295     import DataLocation
296 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
297    
298     datasetPath=self.datasetPath
299    
300 slacapra 1.1 ## Contact the DBS
301 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
302 slacapra 1.1 try:
303 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
304 slacapra 1.1 self.pubdata.fetchDBSInfo()
305    
306 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
307 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
308     raise CrabException(msg)
309 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
310 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
311     raise CrabException(msg)
312 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
313 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
314 slacapra 1.1 raise CrabException(msg)
315    
316 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
317 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
318     self.eventsbyfile=self.pubdata.getEventsPerFile()
319 spiga 1.204 self.parentFiles=self.pubdata.getParent()
320 gutsche 1.3
321 slacapra 1.1 ## get max number of events
322 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
323 slacapra 1.1
324     ## Contact the DLS and build a list of sites hosting the fileblocks
325     try:
326 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
327 gutsche 1.6 dataloc.fetchDLSInfo()
328 slacapra 1.41 except DataLocation.DataLocationError , ex:
329 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
330     raise CrabException(msg)
331 ewv 1.131
332 slacapra 1.1
333 gutsche 1.35 sites = dataloc.getSites()
334     allSites = []
335     listSites = sites.values()
336 slacapra 1.63 for listSite in listSites:
337     for oneSite in listSite:
338 gutsche 1.35 allSites.append(oneSite)
339     allSites = self.uniquelist(allSites)
340 gutsche 1.3
341 gutsche 1.92 # screen output
342     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
343    
344 gutsche 1.35 return sites
345 ewv 1.131
346 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
347 slacapra 1.9 """
348 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
349     and no more than one block.
350     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
351     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
352     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
353     self.maxEvents, self.filesbyblock
354     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
355     self.total_number_of_jobs - Total # of jobs
356     self.list_of_args - File(s) job will run on (a list of lists)
357     """
358    
359     # ---- Handle the possible job splitting configurations ---- #
360     if (self.selectTotalNumberEvents):
361     totalEventsRequested = self.total_number_of_events
362     if (self.selectEventsPerJob):
363     eventsPerJobRequested = self.eventsPerJob
364     if (self.selectNumberOfJobs):
365     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
366    
367     # If user requested all the events in the dataset
368     if (totalEventsRequested == -1):
369     eventsRemaining=self.maxEvents
370     # If user requested more events than are in the dataset
371     elif (totalEventsRequested > self.maxEvents):
372     eventsRemaining = self.maxEvents
373     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
374     # If user requested less events than are in the dataset
375     else:
376     eventsRemaining = totalEventsRequested
377 slacapra 1.22
378 slacapra 1.41 # If user requested more events per job than are in the dataset
379     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
380     eventsPerJobRequested = self.maxEvents
381    
382 gutsche 1.35 # For user info at end
383     totalEventCount = 0
384 gutsche 1.3
385 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
386     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
387 slacapra 1.22
388 gutsche 1.35 if (self.selectNumberOfJobs):
389     common.logger.message("May not create the exact number_of_jobs requested.")
390 slacapra 1.23
391 gutsche 1.38 if ( self.ncjobs == 'all' ) :
392     totalNumberOfJobs = 999999999
393     else :
394     totalNumberOfJobs = self.ncjobs
395 ewv 1.131
396 gutsche 1.35 blocks = blockSites.keys()
397     blockCount = 0
398     # Backup variable in case self.maxEvents counted events in a non-included block
399     numBlocksInDataset = len(blocks)
400 gutsche 1.3
401 gutsche 1.35 jobCount = 0
402     list_of_lists = []
403 gutsche 1.3
404 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
405     jobsOfBlock = {}
406    
407 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
408     # ---- we've met the requested total # of events ---- #
409 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
410 gutsche 1.35 block = blocks[blockCount]
411 gutsche 1.44 blockCount += 1
412 gutsche 1.104 if block not in jobsOfBlock.keys() :
413     jobsOfBlock[block] = []
414 ewv 1.131
415 gutsche 1.68 if self.eventsbyblock.has_key(block) :
416     numEventsInBlock = self.eventsbyblock[block]
417     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
418 ewv 1.131
419 gutsche 1.68 files = self.filesbyblock[block]
420     numFilesInBlock = len(files)
421     if (numFilesInBlock <= 0):
422     continue
423     fileCount = 0
424    
425     # ---- New block => New job ---- #
426 ewv 1.131 parString = ""
427 gutsche 1.68 # counter for number of events in files currently worked on
428     filesEventCount = 0
429     # flag if next while loop should touch new file
430     newFile = 1
431     # job event counter
432     jobSkipEventCount = 0
433 ewv 1.131
434 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
435     # ---- total # of events or we've gone over all the files in this block ---- #
436 spiga 1.204 pString=''
437 gutsche 1.68 while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
438     file = files[fileCount]
439 spiga 1.204 if self.useParent:
440     parent = self.parentFiles[file]
441     for f in parent :
442     pString += '\\\"' + f + '\\\"\,'
443     common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
444     common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
445 gutsche 1.68 if newFile :
446     try:
447     numEventsInFile = self.eventsbyfile[file]
448     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
449     # increase filesEventCount
450     filesEventCount += numEventsInFile
451     # Add file to current job
452     parString += '\\\"' + file + '\\\"\,'
453     newFile = 0
454     except KeyError:
455     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
456 ewv 1.131
457 slacapra 1.177 eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
458 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
459 slacapra 1.177 if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
460 gutsche 1.68 # if last file in block
461     if ( fileCount == numFilesInBlock-1 ) :
462     # end job using last file, use remaining events in block
463     # close job and touch new file
464     fullString = parString[:-2]
465 spiga 1.204 if self.useParent:
466     fullParentString = pString[:-2]
467     list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
468     else:
469     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
470 gutsche 1.68 common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
471     self.jobDestination.append(blockSites[block])
472     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
473 gutsche 1.92 # fill jobs of block dictionary
474 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
475 gutsche 1.68 # reset counter
476     jobCount = jobCount + 1
477     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
478     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
479     jobSkipEventCount = 0
480     # reset file
481 spiga 1.204 pString = ""
482 ewv 1.131 parString = ""
483 gutsche 1.68 filesEventCount = 0
484     newFile = 1
485     fileCount += 1
486     else :
487     # go to next file
488     newFile = 1
489     fileCount += 1
490     # if events in file equal to eventsPerJobRequested
491     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
492 gutsche 1.38 # close job and touch new file
493     fullString = parString[:-2]
494 spiga 1.204 if self.useParent:
495     fullParentString = pString[:-2]
496     list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
497     else:
498     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
499 gutsche 1.68 common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
500 gutsche 1.38 self.jobDestination.append(blockSites[block])
501     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
502 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
503 gutsche 1.38 # reset counter
504     jobCount = jobCount + 1
505 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
506     eventsRemaining = eventsRemaining - eventsPerJobRequested
507 gutsche 1.38 jobSkipEventCount = 0
508     # reset file
509 spiga 1.204 pString = ""
510 ewv 1.131 parString = ""
511 gutsche 1.38 filesEventCount = 0
512     newFile = 1
513     fileCount += 1
514 ewv 1.131
515 gutsche 1.68 # if more events in file remain than eventsPerJobRequested
516 gutsche 1.38 else :
517 gutsche 1.68 # close job but don't touch new file
518     fullString = parString[:-2]
519 spiga 1.204 if self.useParent:
520     fullParentString = pString[:-2]
521     list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
522     else:
523     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
524 gutsche 1.68 common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
525     self.jobDestination.append(blockSites[block])
526     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
527 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
528 gutsche 1.68 # increase counter
529     jobCount = jobCount + 1
530     totalEventCount = totalEventCount + eventsPerJobRequested
531     eventsRemaining = eventsRemaining - eventsPerJobRequested
532     # calculate skip events for last file
533     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
534     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
535     # remove all but the last file
536     filesEventCount = self.eventsbyfile[file]
537 spiga 1.204 if self.useParent:
538     for f in parent : pString += '\\\"' + f + '\\\"\,'
539 ewv 1.160 parString = '\\\"' + file + '\\\"\,'
540 gutsche 1.68 pass # END if
541     pass # END while (iterate over files in the block)
542 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
543 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
544 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
545 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
546 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
547 ewv 1.131
548 gutsche 1.92 # screen output
549     screenOutput = "List of jobs and available destination sites:\n\n"
550    
551 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
552     noSiteBlock = []
553     bloskNoSite = []
554    
555 gutsche 1.92 blockCounter = 0
556 gutsche 1.104 for block in blocks:
557     if block in jobsOfBlock.keys() :
558     blockCounter += 1
559 slacapra 1.176 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
560     ','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
561 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
562 ewv 1.131 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
563 mcinquil 1.124 bloskNoSite.append( blockCounter )
564 ewv 1.131
565 mcinquil 1.124 common.logger.message(screenOutput)
566 fanzago 1.127 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
567 mcinquil 1.126 msg = 'WARNING: No sites are hosting any part of data for block:\n '
568     virgola = ""
569     if len(bloskNoSite) > 1:
570     virgola = ","
571     for block in bloskNoSite:
572     msg += ' ' + str(block) + virgola
573     msg += '\n Related jobs:\n '
574     virgola = ""
575     if len(noSiteBlock) > 1:
576     virgola = ","
577     for range_jobs in noSiteBlock:
578     msg += str(range_jobs) + virgola
579     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
580 slacapra 1.155 if self.cfg_params.has_key('EDG.se_white_list'):
581     msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
582     msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
583     msg += 'Please check if the dataset is available at this site!)\n'
584     if self.cfg_params.has_key('EDG.ce_white_list'):
585     msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
586     msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
587     msg += 'Please check if the dataset is available at this site!)\n'
588    
589 mcinquil 1.126 common.logger.message(msg)
590 gutsche 1.92
591 slacapra 1.9 self.list_of_args = list_of_lists
592     return
593    
594 slacapra 1.21 def jobSplittingNoInput(self):
595 slacapra 1.9 """
596     Perform job splitting based on number of event per job
597     """
598     common.logger.debug(5,'Splitting per events')
599 fanzago 1.130
600 ewv 1.131 if (self.selectEventsPerJob):
601 fanzago 1.130 common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
602     if (self.selectNumberOfJobs):
603     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
604     if (self.selectTotalNumberEvents):
605     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
606 slacapra 1.9
607 slacapra 1.10 if (self.total_number_of_events < 0):
608     msg='Cannot split jobs per Events with "-1" as total number of events'
609     raise CrabException(msg)
610    
611 slacapra 1.22 if (self.selectEventsPerJob):
612 spiga 1.65 if (self.selectTotalNumberEvents):
613     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
614 ewv 1.131 elif(self.selectNumberOfJobs) :
615 spiga 1.65 self.total_number_of_jobs =self.theNumberOfJobs
616 ewv 1.131 self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
617 spiga 1.65
618 slacapra 1.22 elif (self.selectNumberOfJobs) :
619     self.total_number_of_jobs = self.theNumberOfJobs
620     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
621 ewv 1.131
622 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
623    
624     # is there any remainder?
625     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
626    
627     common.logger.debug(5,'Check '+str(check))
628    
629 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
630 slacapra 1.9 if check > 0:
631 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
632 slacapra 1.9
633 slacapra 1.10 # argument is seed number.$i
634 slacapra 1.9 self.list_of_args = []
635     for i in range(self.total_number_of_jobs):
636 gutsche 1.35 ## Since there is no input, any site is good
637 ewv 1.131 self.jobDestination.append([""]) #must be empty to write correctly the xml
638 slacapra 1.90 args=[]
639 spiga 1.57 if (self.firstRun):
640 slacapra 1.138 ## pythia first run
641 slacapra 1.90 args.append(str(self.firstRun)+str(i))
642     self.list_of_args.append(args)
643 ewv 1.131
644 gutsche 1.3 return
645    
646 spiga 1.42
647 spiga 1.187 def jobSplittingForScript(self):
648 spiga 1.42 """
649     Perform job splitting based on number of job
650     """
651     common.logger.debug(5,'Splitting per job')
652     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
653    
654     self.total_number_of_jobs = self.theNumberOfJobs
655    
656     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
657    
658     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
659    
660     # argument is seed number.$i
661     self.list_of_args = []
662     for i in range(self.total_number_of_jobs):
663     self.jobDestination.append([""])
664     self.list_of_args.append([str(i)])
665     return
666    
667 spiga 1.208 def split(self, jobParams,firstJobID):
668 ewv 1.131
669 gutsche 1.3 njobs = self.total_number_of_jobs
670 slacapra 1.9 arglist = self.list_of_args
671 gutsche 1.3 # create the empty structure
672     for i in range(njobs):
673     jobParams.append("")
674 ewv 1.131
675 spiga 1.165 listID=[]
676     listField=[]
677 spiga 1.208 for id in range(njobs):
678     job = id + int(firstJobID)
679     jobParams[id] = arglist[id]
680 spiga 1.167 listID.append(job+1)
681 spiga 1.162 job_ToSave ={}
682 spiga 1.169 concString = ' '
683 spiga 1.165 argu=''
684 spiga 1.208 if len(jobParams[id]):
685     argu += concString.join(jobParams[id] )
686 spiga 1.187 job_ToSave['arguments']= str(job+1)+' '+argu
687 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
688 spiga 1.165 listField.append(job_ToSave)
689 spiga 1.169 msg="Job "+str(job)+" Arguments: "+str(job+1)+" "+argu+"\n" \
690 spiga 1.208 +" Destination: "+str(self.jobDestination[id])
691 spiga 1.165 common.logger.debug(5,msg)
692 spiga 1.187 common._db.updateJob_(listID,listField)
693 spiga 1.181 self.argsList = (len(jobParams[0])+1)
694 gutsche 1.3
695     return
696 ewv 1.131
697 gutsche 1.3 def numberOfJobs(self):
698     return self.total_number_of_jobs
699    
700 slacapra 1.1 def getTarBall(self, exe):
701     """
702     Return the TarBall with lib and exe
703     """
704 corvo 1.56 self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
705 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
706     return self.tgzNameWithPath
707    
708     # Prepare a tar gzipped file with user binaries.
709     self.buildTar_(exe)
710    
711     return string.strip(self.tgzNameWithPath)
712    
713     def buildTar_(self, executable):
714    
715     # First of all declare the user Scram area
716     swArea = self.scram.getSWArea_()
717     swReleaseTop = self.scram.getReleaseTop_()
718 ewv 1.131
719 slacapra 1.1 ## check if working area is release top
720     if swReleaseTop == '' or swArea == swReleaseTop:
721 afanfani 1.172 common.logger.debug(3,"swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
722 slacapra 1.1 return
723    
724 slacapra 1.61 import tarfile
725     try: # create tar ball
726     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
727     ## First find the executable
728 slacapra 1.86 if (self.executable != ''):
729 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
730     if ( not exeWithPath ):
731     raise CrabException('User executable '+executable+' not found')
732 ewv 1.131
733 slacapra 1.61 ## then check if it's private or not
734     if exeWithPath.find(swReleaseTop) == -1:
735     # the exe is private, so we must ship
736     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
737     path = swArea+'/'
738 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
739     if exeWithPath.find(path) >= 0 :
740     exe = string.replace(exeWithPath, path,'')
741 slacapra 1.129 tar.add(path+exe,exe)
742 corvo 1.85 else :
743     tar.add(exeWithPath,os.path.basename(executable))
744 slacapra 1.61 pass
745     else:
746     # the exe is from release, we'll find it on WN
747     pass
748 ewv 1.131
749 slacapra 1.61 ## Now get the libraries: only those in local working area
750     libDir = 'lib'
751     lib = swArea+'/' +libDir
752     common.logger.debug(5,"lib "+lib+" to be tarred")
753     if os.path.exists(lib):
754     tar.add(lib,libDir)
755 ewv 1.131
756 slacapra 1.61 ## Now check if module dir is present
757     moduleDir = 'module'
758     module = swArea + '/' + moduleDir
759     if os.path.isdir(module):
760     tar.add(module,moduleDir)
761    
762     ## Now check if any data dir(s) is present
763 spiga 1.179 self.dataExist = False
764 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
765 slacapra 1.206 while len(todo_list):
766     entry, name = todo_list.pop()
767 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
768 slacapra 1.206 continue
769 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
770 slacapra 1.206 entryPath = entry + '/'
771 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
772 slacapra 1.206 if name == 'data':
773     self.dataExist=True
774     common.logger.debug(5,"data "+entry+" to be tarred")
775 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
776 slacapra 1.206 pass
777     pass
778 ewv 1.182
779 spiga 1.179 ### CMSSW ParameterSet
780     if not self.pset is None:
781     cfg_file = common.work_space.jobDir()+self.configFilename()
782 ewv 1.182 tar.add(cfg_file,self.configFilename())
783 spiga 1.179 common.logger.debug(5,"File added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
784 slacapra 1.61
785 fanzago 1.93
786 fanzago 1.152 ## Add ProdCommon dir to tar
787 slacapra 1.211 prodcommonDir = './'
788     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
789 slacapra 1.214 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools','ProdCommon/Core','ProdCommon/MCPayloads', 'IMProv']
790     for file in neededStuff:
791     tar.add(prodcommonPath+file,prodcommonDir+file)
792 spiga 1.179 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
793    
794     ##### ML stuff
795     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
796     path=os.environ['CRABDIR'] + '/python/'
797     for file in ML_file_list:
798     tar.add(path+file,file)
799     common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
800    
801     ##### Utils
802 spiga 1.203 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py']
803 spiga 1.179 for file in Utils_file_list:
804     tar.add(path+file,file)
805     common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
806 ewv 1.131
807 ewv 1.182 ##### AdditionalFiles
808 spiga 1.179 for file in self.additional_inbox_files:
809     tar.add(file,string.split(file,'/')[-1])
810 slacapra 1.61 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
811 ewv 1.182
812 slacapra 1.61 tar.close()
813 slacapra 1.220 except IOError:
814     raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
815 slacapra 1.212 except tarfile.TarError:
816 slacapra 1.206 raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
817 gutsche 1.72
818     ## check for tarball size
819     tarballinfo = os.stat(self.tgzNameWithPath)
820     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
821     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
822    
823 slacapra 1.61 ## create tar-ball with ML stuff
824 slacapra 1.97
825 spiga 1.165 def wsSetupEnvironment(self, nj=0):
826 slacapra 1.1 """
827     Returns part of a job script which prepares
828     the execution environment for the job 'nj'.
829     """
830 ewv 1.184 if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
831     psetName = 'pset.py'
832     else:
833     psetName = 'pset.cfg'
834 slacapra 1.1 # Prepare JobType-independent part
835 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
836 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
837 ewv 1.131 txt += 'if [ $middleware == LCG ]; then \n'
838 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
839     txt += 'elif [ $middleware == OSG ]; then\n'
840 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
841 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
842 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
843     txt += ' job_exit_code=10016\n'
844     txt += ' func_exit\n'
845 gutsche 1.3 txt += ' fi\n'
846 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
847 gutsche 1.3 txt += '\n'
848     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
849     txt += ' cd $WORKING_DIR\n'
850 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
851 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
852 gutsche 1.3 txt += 'fi\n'
853 slacapra 1.1
854     # Prepare JobType-specific part
855     scram = self.scram.commandName()
856     txt += '\n\n'
857 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
858     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
859 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
860     txt += 'status=$?\n'
861     txt += 'if [ $status != 0 ] ; then\n'
862 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
863     txt += ' job_exit_code=10034\n'
864 fanzago 1.163 txt += ' func_exit\n'
865 slacapra 1.1 txt += 'fi \n'
866     txt += 'cd '+self.version+'\n'
867 fanzago 1.99 txt += 'SOFTWARE_DIR=`pwd`\n'
868 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
869 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
870 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
871     txt += ' echo "ERROR ==> Problem with the command: "\n'
872     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
873     txt += ' job_exit_code=10034\n'
874     txt += ' func_exit\n'
875     txt += 'fi \n'
876 slacapra 1.1 # Handle the arguments:
877     txt += "\n"
878 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
879 slacapra 1.1 txt += "\n"
880 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
881 slacapra 1.1 txt += "then\n"
882 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
883     txt += ' job_exit_code=50113\n'
884     txt += " func_exit\n"
885 slacapra 1.1 txt += "fi\n"
886     txt += "\n"
887    
888     # Prepare job-specific part
889     job = common.job_list[nj]
890 ewv 1.131 if (self.datasetPath):
891 fanzago 1.93 txt += '\n'
892     txt += 'DatasetPath='+self.datasetPath+'\n'
893    
894     datasetpath_split = self.datasetPath.split("/")
895 ewv 1.131
896 fanzago 1.93 txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
897     txt += 'DataTier='+datasetpath_split[2]+'\n'
898 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
899 fanzago 1.93
900     else:
901     txt += 'DatasetPath=MCDataTier\n'
902     txt += 'PrimaryDataset=null\n'
903     txt += 'DataTier=null\n'
904     txt += 'ApplicationFamily=MCDataTier\n'
905 ewv 1.170 if self.pset != None:
906 spiga 1.42 pset = os.path.basename(job.configFilename())
907     txt += '\n'
908 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
909 spiga 1.42 if (self.datasetPath): # standard job
910 ewv 1.160 txt += 'InputFiles=${args[1]}; export InputFiles\n'
911 spiga 1.204 if (self.useParent):
912     txt += 'ParentFiles=${args[2]}; export ParentFiles\n'
913     txt += 'MaxEvents=${args[3]}; export MaxEvents\n'
914     txt += 'SkipEvents=${args[4]}; export SkipEvents\n'
915     else:
916     txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
917     txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
918 spiga 1.42 txt += 'echo "Inputfiles:<$InputFiles>"\n'
919 spiga 1.204 if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n'
920 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
921     txt += 'echo "SkipEvents:<$SkipEvents>"\n'
922     else: # pythia like job
923 ewv 1.160 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
924     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
925     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
926     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
927 slacapra 1.90 if (self.firstRun):
928 ewv 1.160 txt += 'FirstRun=${args[1]}; export FirstRun\n'
929 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
930 slacapra 1.90
931 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
932 slacapra 1.1
933    
934 fanzago 1.163 if self.pset != None:
935 ewv 1.184 # FUTURE: Can simply for 2_1_x and higher
936 spiga 1.42 txt += '\n'
937 spiga 1.197 if self.debug_wrapper==True:
938 spiga 1.188 txt += 'echo "***** cat ' + psetName + ' *********"\n'
939     txt += 'cat ' + psetName + '\n'
940     txt += 'echo "****** end ' + psetName + ' ********"\n'
941     txt += '\n'
942 ewv 1.184 txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
943 fanzago 1.94 txt += 'echo "PSETHASH = $PSETHASH" \n'
944 fanzago 1.93 txt += '\n'
945 gutsche 1.3 return txt
946 slacapra 1.176
947 fanzago 1.166 def wsUntarSoftware(self, nj=0):
948 gutsche 1.3 """
949     Put in the script the commands to build an executable
950     or a library.
951     """
952    
953 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
954 gutsche 1.3
955     if os.path.isfile(self.tgzNameWithPath):
956 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
957 gutsche 1.3 txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
958 spiga 1.199 if self.debug_wrapper:
959     txt += 'ls -Al \n'
960 gutsche 1.3 txt += 'untar_status=$? \n'
961     txt += 'if [ $untar_status -ne 0 ]; then \n'
962 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
963     txt += ' job_exit_code=$untar_status\n'
964     txt += ' func_exit\n'
965 gutsche 1.3 txt += 'else \n'
966     txt += ' echo "Successful untar" \n'
967     txt += 'fi \n'
968 gutsche 1.50 txt += '\n'
969 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
970 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
971 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
972 gutsche 1.50 txt += 'else\n'
973 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
974 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
975 gutsche 1.50 txt += 'fi\n'
976     txt += '\n'
977    
978 gutsche 1.3 pass
979 ewv 1.131
980 slacapra 1.1 return txt
981 ewv 1.170
982 fanzago 1.166 def wsBuildExe(self, nj=0):
983     """
984     Put in the script the commands to build an executable
985     or a library.
986     """
987    
988     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
989     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
990    
991 ewv 1.170 txt += 'rm -r lib/ module/ \n'
992     txt += 'mv $RUNTIME_AREA/lib/ . \n'
993     txt += 'mv $RUNTIME_AREA/module/ . \n'
994 spiga 1.186 if self.dataExist == True:
995     txt += 'rm -r src/ \n'
996     txt += 'mv $RUNTIME_AREA/src/ . \n'
997 ewv 1.182 if len(self.additional_inbox_files)>0:
998 spiga 1.179 for file in self.additional_inbox_files:
999 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
1000 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
1001     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
1002 ewv 1.170
1003 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
1004 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1005 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
1006 fanzago 1.166 txt += 'else\n'
1007 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
1008 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1009     txt += 'fi\n'
1010     txt += '\n'
1011    
1012     return txt
1013 slacapra 1.1
1014 ewv 1.131
1015 slacapra 1.1 def executableName(self):
1016 ewv 1.192 if self.scriptExe:
1017 spiga 1.42 return "sh "
1018     else:
1019     return self.executable
1020 slacapra 1.1
1021     def executableArgs(self):
1022 ewv 1.160 # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
1023 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1024 spiga 1.42 return self.scriptExe + " $NJob"
1025 fanzago 1.115 else:
1026 ewv 1.160 ex_args = ""
1027 ewv 1.171 # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
1028 ewv 1.160 # Framework job report
1029 ewv 1.184 if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
1030 fanzago 1.166 ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
1031 ewv 1.184 # Type of config file
1032     if self.CMSSW_major >= 2 :
1033 ewv 1.171 ex_args += " -p pset.py"
1034 fanzago 1.115 else:
1035 ewv 1.160 ex_args += " -p pset.cfg"
1036     return ex_args
1037 slacapra 1.1
1038     def inputSandbox(self, nj):
1039     """
1040     Returns a list of filenames to be put in JDL input sandbox.
1041     """
1042     inp_box = []
1043     if os.path.isfile(self.tgzNameWithPath):
1044     inp_box.append(self.tgzNameWithPath)
1045 spiga 1.168 wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
1046     inp_box.append(common.work_space.pathForTgz() +'job/'+ wrapper)
1047 slacapra 1.1 return inp_box
1048    
1049     def outputSandbox(self, nj):
1050     """
1051     Returns a list of filenames to be put in JDL output sandbox.
1052     """
1053     out_box = []
1054    
1055     ## User Declared output files
1056 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1057 ewv 1.131 n_out = nj + 1
1058 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
1059 slacapra 1.1 return out_box
1060    
1061    
1062     def wsRenameOutput(self, nj):
1063     """
1064     Returns part of a job script which renames the produced files.
1065     """
1066    
1067 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1068 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1069     txt += 'echo ">>> current directory content:"\n'
1070 spiga 1.199 if self.debug_wrapper:
1071     txt += 'ls -Al\n'
1072 fanzago 1.145 txt += '\n'
1073 slacapra 1.54
1074 fanzago 1.128 for fileWithSuffix in (self.output_file):
1075 slacapra 1.207 output_file_num = numberFile(fileWithSuffix, '$NJob')
1076 slacapra 1.1 txt += '\n'
1077 gutsche 1.7 txt += '# check output file\n'
1078 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1079 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1080     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
1081 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1082 ewv 1.147 else:
1083     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1084     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1085 slacapra 1.106 txt += 'else\n'
1086 fanzago 1.161 txt += ' job_exit_code=60302\n'
1087     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1088 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
1089 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
1090     txt += ' echo "prepare dummy output file"\n'
1091     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1092     txt += ' fi \n'
1093 slacapra 1.1 txt += 'fi\n'
1094 slacapra 1.105 file_list = []
1095     for fileWithSuffix in (self.output_file):
1096 slacapra 1.207 file_list.append(numberFile(fileWithSuffix, '$NJob'))
1097 ewv 1.131
1098 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1099 fanzago 1.149 txt += '\n'
1100 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1101     txt += 'echo ">>> current directory content:"\n'
1102 spiga 1.199 if self.debug_wrapper:
1103     txt += 'ls -Al\n'
1104 fanzago 1.148 txt += '\n'
1105 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1106 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1107 slacapra 1.1 return txt
1108    
1109 slacapra 1.63 def getRequirements(self, nj=[]):
1110 slacapra 1.1 """
1111 ewv 1.131 return job requirements to add to jdl files
1112 slacapra 1.1 """
1113     req = ''
1114 slacapra 1.47 if self.version:
1115 slacapra 1.10 req='Member("VO-cms-' + \
1116 slacapra 1.47 self.version + \
1117 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1118 ewv 1.192 if self.executable_arch:
1119 gutsche 1.107 req+=' && Member("VO-cms-' + \
1120 slacapra 1.105 self.executable_arch + \
1121     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1122 gutsche 1.35
1123     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1124 afanfani 1.158 if common.scheduler.name() == "glitecoll":
1125     req += ' && other.GlueCEStateStatus == "Production" '
1126 gutsche 1.35
1127 slacapra 1.1 return req
1128 gutsche 1.3
1129     def configFilename(self):
1130     """ return the config filename """
1131 ewv 1.182 # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
1132 ewv 1.184 if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1133 ewv 1.182 return self.name()+'.py'
1134     else:
1135     return self.name()+'.cfg'
1136 gutsche 1.3
1137     def wsSetupCMSOSGEnvironment_(self):
1138     """
1139     Returns part of a job script which is prepares
1140     the execution environment and which is common for all CMS jobs.
1141     """
1142 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1143     txt += ' echo ">>> setup CMS OSG environment:"\n'
1144 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1145     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1146 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1147 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1148 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1149 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1150     txt += ' else\n'
1151 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1152     txt += ' job_exit_code=10020\n'
1153     txt += ' func_exit\n'
1154 fanzago 1.133 txt += ' fi\n'
1155 gutsche 1.3 txt += '\n'
1156 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1157 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1158 gutsche 1.3
1159     return txt
1160 ewv 1.131
1161 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
1162     """
1163     Returns part of a job script which is prepares
1164     the execution environment and which is common for all CMS jobs.
1165     """
1166 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1167     txt += ' echo ">>> setup CMS LCG environment:"\n'
1168 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1169     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1170     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1171     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1172 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1173     txt += ' job_exit_code=10031\n'
1174     txt += ' func_exit\n'
1175 fanzago 1.133 txt += ' else\n'
1176     txt += ' echo "Sourcing environment... "\n'
1177     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1178 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1179     txt += ' job_exit_code=10020\n'
1180     txt += ' func_exit\n'
1181 fanzago 1.133 txt += ' fi\n'
1182     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1183     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1184     txt += ' result=$?\n'
1185     txt += ' if [ $result -ne 0 ]; then\n'
1186 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1187     txt += ' job_exit_code=10032\n'
1188     txt += ' func_exit\n'
1189 fanzago 1.133 txt += ' fi\n'
1190     txt += ' fi\n'
1191     txt += ' \n'
1192 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1193 gutsche 1.3 return txt
1194 gutsche 1.5
1195 fanzago 1.93 def modifyReport(self, nj):
1196     """
1197 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1198 fanzago 1.93 """
1199 ewv 1.160 txt = '\n#Written by cms_cmssw::modifyReport\n'
1200 slacapra 1.176 publish_data = int(self.cfg_params.get('USER.publish_data',0))
1201 ewv 1.131 if (publish_data == 1):
1202 fanzago 1.94 processedDataset = self.cfg_params['USER.publish_data_name']
1203 fanzago 1.225 ### FEDE for publication with LSF and CAF schedulers ####
1204     print "common.scheduler.name().upper() = ", common.scheduler.name().upper()
1205     if (common.scheduler.name().upper() == "CAF" or common.scheduler.name().upper() == "LSF"):
1206     print "chiamo LFNBaseName con localUser = true"
1207     LFNBaseName = LFNBase(processedDataset, LocalUser=True)
1208     else :
1209     LFNBaseName = LFNBase(processedDataset)
1210     ####
1211 fanzago 1.175
1212     txt += 'if [ $copy_exit_status -eq 0 ]; then\n'
1213 fanzago 1.173 txt += ' FOR_LFN=%s_${PSETHASH}/\n'%(LFNBaseName)
1214 fanzago 1.175 txt += 'else\n'
1215     txt += ' FOR_LFN=/copy_problems/ \n'
1216     txt += ' SE=""\n'
1217     txt += ' SE_PATH=""\n'
1218     txt += 'fi\n'
1219 ewv 1.182
1220 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1221 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1222 fanzago 1.175 txt += 'ProcessedDataset='+processedDataset+'\n'
1223     txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1224     txt += 'echo "SE = $SE"\n'
1225     txt += 'echo "SE_PATH = $SE_PATH"\n'
1226     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1227     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1228 fanzago 1.217 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1229     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1230 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1231     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1232     txt += ' modifyReport_result=70500\n'
1233     txt += ' job_exit_code=$modifyReport_result\n'
1234     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1235     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1236     txt += 'else\n'
1237     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1238 spiga 1.103 txt += 'fi\n'
1239 fanzago 1.93 return txt
1240 fanzago 1.99
1241 ewv 1.192 def wsParseFJR(self):
1242 spiga 1.189 """
1243 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1244 spiga 1.189 """
1245     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1246     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1247     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1248     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1249 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1250     if self.debug_wrapper :
1251     txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1252     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1253 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1254     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1255 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1256 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1257 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1258     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1259 spiga 1.189 txt += ' else\n'
1260     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1261     txt += ' fi\n'
1262     txt += ' else\n'
1263     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1264     txt += ' fi\n'
1265     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1266    
1267 spiga 1.223 if (self.datasetPath and not self.dataset_pu ):
1268 spiga 1.189 # VERIFY PROCESSED DATA
1269     txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1270     txt += ' echo ">>> Verify list of processed files:"\n'
1271 ewv 1.196 txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1272 spiga 1.200 txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1273 spiga 1.189 txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1274     txt += ' mv tmp.txt input-files.txt\n'
1275     txt += ' echo "cat input-files.txt"\n'
1276     txt += ' echo "----------------------"\n'
1277     txt += ' cat input-files.txt\n'
1278     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1279     txt += ' mv tmp.txt processed-files.txt\n'
1280     txt += ' echo "----------------------"\n'
1281     txt += ' echo "cat processed-files.txt"\n'
1282     txt += ' echo "----------------------"\n'
1283     txt += ' cat processed-files.txt\n'
1284     txt += ' echo "----------------------"\n'
1285     txt += ' diff -q input-files.txt processed-files.txt\n'
1286     txt += ' fileverify_status=$?\n'
1287     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1288     txt += ' executable_exit_status=30001\n'
1289     txt += ' echo "ERROR ==> not all input files processed"\n'
1290     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1291     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1292     txt += ' fi\n'
1293     txt += ' fi\n'
1294     txt += '\n'
1295     txt += 'else\n'
1296     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1297     txt += 'fi\n'
1298     txt += '\n'
1299     txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1300     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1301     txt += 'job_exit_code=$executable_exit_status\n'
1302    
1303     return txt
1304    
1305 gutsche 1.5 def setParam_(self, param, value):
1306     self._params[param] = value
1307    
1308     def getParams(self):
1309     return self._params
1310 gutsche 1.8
1311 gutsche 1.35 def uniquelist(self, old):
1312     """
1313     remove duplicates from a list
1314     """
1315     nd={}
1316     for e in old:
1317     nd[e]=0
1318     return nd.keys()
1319 mcinquil 1.121
1320 spiga 1.169 def outList(self):
1321 mcinquil 1.121 """
1322     check the dimension of the output files
1323     """
1324 spiga 1.169 txt = ''
1325     txt += 'echo ">>> list of expected files on output sandbox"\n'
1326 mcinquil 1.121 listOutFiles = []
1327 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1328 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1329 fanzago 1.148 if (self.return_data == 1):
1330 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1331 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1332 spiga 1.169 listOutFiles.append(stdout)
1333     listOutFiles.append(stderr)
1334 ewv 1.156 else:
1335 spiga 1.157 for file in (self.output_file_sandbox):
1336 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1337 spiga 1.169 listOutFiles.append(stdout)
1338     listOutFiles.append(stderr)
1339 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1340 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1341 spiga 1.169 txt += 'export filesToCheck\n'
1342 ewv 1.170 return txt