ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.235
Committed: Fri Aug 29 15:06:41 2008 UTC (16 years, 8 months ago) by spiga
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_3_2_Fnal, CRAB_2_3_2, CRAB_2_3_2_pre7, CRAB_2_3_2_pre5
Changes since 1.234: +0 -1 lines
Log Message:
removed uncommented wrong variable

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 ewv 1.228 from BlackWhiteListParser import SEBlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8 fanzago 1.173 from LFNBaseName import *
9 slacapra 1.1
10 slacapra 1.105 import os, string, glob
11 slacapra 1.1
12     class Cmssw(JobType):
13 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
14 slacapra 1.1 JobType.__init__(self, 'CMSSW')
15     common.logger.debug(3,'CMSSW::__init__')
16 spiga 1.208 self.skip_blocks = skip_blocks
17 ewv 1.226
18 mcinquil 1.140 self.argsList = []
19 mcinquil 1.144
20 gutsche 1.3 self._params = {}
21     self.cfg_params = cfg_params
22 fanzago 1.115 # init BlackWhiteListParser
23 ewv 1.228 self.blackWhiteListParser = SEBlackWhiteListParser(cfg_params)
24 fanzago 1.115
25 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
26     server=self.cfg_params.get('CRAB.server_name',None)
27     size = 9.5
28     if server: size = 99999
29     ### D.S.
30     self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',size))
31 gutsche 1.72
32 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
33 gutsche 1.38 self.ncjobs = ncjobs
34    
35 slacapra 1.1 log = common.logger
36 ewv 1.131
37 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
38     self.additional_inbox_files = []
39     self.scriptExe = ''
40     self.executable = ''
41 slacapra 1.71 self.executable_arch = self.scram.getArch()
42 slacapra 1.1 self.tgz_name = 'default.tgz'
43 corvo 1.56 self.scriptName = 'CMSSW.sh'
44 ewv 1.192 self.pset = ''
45 spiga 1.187 self.datasetPath = ''
46 gutsche 1.3
47 gutsche 1.50 # set FJR file name
48     self.fjrFileName = 'crab_fjr.xml'
49    
50 slacapra 1.1 self.version = self.scram.getSWVersion()
51 ewv 1.182 version_array = self.version.split('_')
52 ewv 1.184 self.CMSSW_major = 0
53     self.CMSSW_minor = 0
54     self.CMSSW_patch = 0
55 ewv 1.182 try:
56 ewv 1.184 self.CMSSW_major = int(version_array[1])
57     self.CMSSW_minor = int(version_array[2])
58     self.CMSSW_patch = int(version_array[3])
59 ewv 1.182 except:
60 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
61 ewv 1.182 raise CrabException(msg)
62    
63 slacapra 1.1 ### collect Data cards
64 gutsche 1.66
65 slacapra 1.153 if not cfg_params.has_key('CMSSW.datasetpath'):
66 ewv 1.131 msg = "Error: datasetpath not defined "
67 slacapra 1.1 raise CrabException(msg)
68 ewv 1.226
69 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
70 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
71 ewv 1.226
72 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
73     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
74     if string.lower(tmp)=='none':
75     self.datasetPath = None
76     self.selectNoInput = 1
77     else:
78     self.datasetPath = tmp
79     self.selectNoInput = 0
80 gutsche 1.5
81 slacapra 1.1 self.dataTiers = []
82 spiga 1.197 self.debugWrap = ''
83     self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False)
84     if self.debug_wrapper: self.debugWrap='--debug'
85 slacapra 1.1 ## now the application
86 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
87     log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
88 slacapra 1.1
89 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
90 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
91 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
92     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
93     if self.pset.lower() != 'none' :
94     if (not os.path.exists(self.pset)):
95     raise CrabException("User defined PSet file "+self.pset+" does not exist")
96     else:
97     self.pset = None
98 slacapra 1.1
99     # output files
100 slacapra 1.53 ## stuff which must be returned always via sandbox
101     self.output_file_sandbox = []
102    
103     # add fjr report by default via sandbox
104     self.output_file_sandbox.append(self.fjrFileName)
105    
106     # other output files to be returned via sandbox or copied to SE
107 mcinquil 1.216 outfileflag = False
108 slacapra 1.153 self.output_file = []
109     tmp = cfg_params.get('CMSSW.output_file',None)
110     if tmp :
111 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
112 mcinquil 1.216 outfileflag = True #output found
113     #else:
114     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
115 slacapra 1.1
116     # script_exe file as additional file in inputSandbox
117 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
118     if self.scriptExe :
119 slacapra 1.176 if not os.path.isfile(self.scriptExe):
120     msg ="ERROR. file "+self.scriptExe+" not found"
121     raise CrabException(msg)
122     self.additional_inbox_files.append(string.strip(self.scriptExe))
123 slacapra 1.70
124 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
125 slacapra 1.176 msg ="Error. script_exe not defined"
126     raise CrabException(msg)
127 spiga 1.42
128 ewv 1.226 # use parent files...
129 spiga 1.204 self.useParent = self.cfg_params.get('CMSSW.use_parent',False)
130    
131 slacapra 1.1 ## additional input files
132 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
133 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
134 slacapra 1.70 for tmp in tmpAddFiles:
135     tmp = string.strip(tmp)
136     dirname = ''
137     if not tmp[0]=="/": dirname = "."
138 corvo 1.85 files = []
139     if string.find(tmp,"*")>-1:
140     files = glob.glob(os.path.join(dirname, tmp))
141     if len(files)==0:
142     raise CrabException("No additional input file found with this pattern: "+tmp)
143     else:
144     files.append(tmp)
145 slacapra 1.70 for file in files:
146     if not os.path.exists(file):
147     raise CrabException("Additional input file not found: "+file)
148 slacapra 1.45 pass
149 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
150 slacapra 1.1 pass
151     pass
152 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
153 slacapra 1.153 pass
154 gutsche 1.3
155 slacapra 1.9 ## Events per job
156 slacapra 1.153 if cfg_params.has_key('CMSSW.events_per_job'):
157 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
158 slacapra 1.9 self.selectEventsPerJob = 1
159 slacapra 1.153 else:
160 slacapra 1.9 self.eventsPerJob = -1
161     self.selectEventsPerJob = 0
162 ewv 1.131
163 slacapra 1.22 ## number of jobs
164 slacapra 1.153 if cfg_params.has_key('CMSSW.number_of_jobs'):
165 slacapra 1.22 self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
166     self.selectNumberOfJobs = 1
167 slacapra 1.153 else:
168 slacapra 1.22 self.theNumberOfJobs = 0
169     self.selectNumberOfJobs = 0
170 slacapra 1.10
171 slacapra 1.153 if cfg_params.has_key('CMSSW.total_number_of_events'):
172 gutsche 1.35 self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
173     self.selectTotalNumberEvents = 1
174 spiga 1.193 if self.selectNumberOfJobs == 1:
175 spiga 1.202 if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
176 spiga 1.193 msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
177     raise CrabException(msg)
178 slacapra 1.153 else:
179 gutsche 1.35 self.total_number_of_events = 0
180     self.selectTotalNumberEvents = 0
181    
182 spiga 1.187 if self.pset != None:
183 spiga 1.42 if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
184     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
185     raise CrabException(msg)
186     else:
187     if (self.selectNumberOfJobs == 0):
188     msg = 'Must specify number_of_jobs.'
189     raise CrabException(msg)
190 gutsche 1.35
191 ewv 1.160 ## New method of dealing with seeds
192     self.incrementSeeds = []
193     self.preserveSeeds = []
194     if cfg_params.has_key('CMSSW.preserve_seeds'):
195     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
196     for tmp in tmpList:
197     tmp.strip()
198     self.preserveSeeds.append(tmp)
199     if cfg_params.has_key('CMSSW.increment_seeds'):
200     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
201     for tmp in tmpList:
202     tmp.strip()
203     self.incrementSeeds.append(tmp)
204    
205 ewv 1.227 ## FUTURE: Can remove in CRAB 2.4.0
206     self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
207 slacapra 1.153 self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
208 ewv 1.227 self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
209 slacapra 1.153 self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
210 ewv 1.227 if self.sourceSeed or self.sourceSeedVtx or self.sourceSeedG4 or self.sourceSeedMix:
211     msg = 'pythia_seed, vtx_seed, g4_seed, and mix_seed are no longer valid settings. You must use increment_seeds or preserve_seeds'
212     raise CrabException(msg)
213 slacapra 1.90
214 slacapra 1.153 self.firstRun = cfg_params.get('CMSSW.first_run',None)
215 slacapra 1.90
216 ewv 1.147 # Copy/return
217 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
218     self.return_data = int(cfg_params.get('USER.return_data',0))
219 ewv 1.147
220 slacapra 1.1 #DBSDLS-start
221 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
222 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
223     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
224 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
225 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
226 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
227 gutsche 1.35 blockSites = {}
228 slacapra 1.9 if self.datasetPath:
229 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
230 ewv 1.131 #DBSDLS-end
231 slacapra 1.1
232 slacapra 1.9 ## Select Splitting
233 ewv 1.131 if self.selectNoInput:
234 spiga 1.187 if self.pset == None:
235 spiga 1.42 self.jobSplittingForScript()
236     else:
237     self.jobSplittingNoInput()
238 gutsche 1.92 else:
239 corvo 1.56 self.jobSplittingByBlocks(blockSites)
240 gutsche 1.5
241 spiga 1.208 # modify Pset only the first time
242     if isNew:
243     if self.pset != None:
244     import PsetManipulator as pp
245     PsetEdit = pp.PsetManipulator(self.pset)
246     try:
247     # Add FrameworkJobReport to parameter-set, set max events.
248     # Reset later for data jobs by writeCFG which does all modifications
249     PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
250     PsetEdit.maxEvent(self.eventsPerJob)
251     PsetEdit.psetWriter(self.configFilename())
252 slacapra 1.215 ## If present, add TFileService to output files
253     if not int(cfg_params.get('CMSSW.skip_TFileService_output',0)):
254     tfsOutput = PsetEdit.getTFileService()
255 ewv 1.226 if tfsOutput:
256 slacapra 1.215 if tfsOutput in self.output_file:
257     common.logger.debug(5,"Output from TFileService "+tfsOutput+" already in output files")
258     else:
259 mcinquil 1.216 outfileflag = True #output found
260 slacapra 1.215 self.output_file.append(tfsOutput)
261     common.logger.message("Adding "+tfsOutput+" to output files (from TFileService)")
262 slacapra 1.218 pass
263     pass
264     ## If present and requested, add PoolOutputModule to output files
265 slacapra 1.219 if int(cfg_params.get('CMSSW.get_edm_output',0)):
266 slacapra 1.218 edmOutput = PsetEdit.getPoolOutputModule()
267 ewv 1.226 if edmOutput:
268 slacapra 1.218 if edmOutput in self.output_file:
269     common.logger.debug(5,"Output from PoolOutputModule "+edmOutput+" already in output files")
270     else:
271     self.output_file.append(edmOutput)
272     common.logger.message("Adding "+edmOutput+" to output files (from PoolOutputModule)")
273     pass
274     pass
275 slacapra 1.215 except CrabException:
276 spiga 1.208 msg='Error while manipulating ParameterSet: exiting...'
277     raise CrabException(msg)
278 ewv 1.226 ## Prepare inputSandbox TarBall (only the first time)
279 spiga 1.208 self.tgzNameWithPath = self.getTarBall(self.executable)
280 gutsche 1.3
281 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
282    
283 slacapra 1.86 import DataDiscovery
284     import DataLocation
285 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
286    
287     datasetPath=self.datasetPath
288    
289 slacapra 1.1 ## Contact the DBS
290 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
291 slacapra 1.1 try:
292 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
293 slacapra 1.1 self.pubdata.fetchDBSInfo()
294    
295 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
296 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
297     raise CrabException(msg)
298 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
299 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
300     raise CrabException(msg)
301 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
302 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
303 slacapra 1.1 raise CrabException(msg)
304    
305 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
306 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
307     self.eventsbyfile=self.pubdata.getEventsPerFile()
308 spiga 1.204 self.parentFiles=self.pubdata.getParent()
309 gutsche 1.3
310 slacapra 1.1 ## get max number of events
311 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
312 slacapra 1.1
313     ## Contact the DLS and build a list of sites hosting the fileblocks
314     try:
315 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
316 gutsche 1.6 dataloc.fetchDLSInfo()
317 slacapra 1.41 except DataLocation.DataLocationError , ex:
318 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
319     raise CrabException(msg)
320 ewv 1.131
321 slacapra 1.1
322 gutsche 1.35 sites = dataloc.getSites()
323     allSites = []
324     listSites = sites.values()
325 slacapra 1.63 for listSite in listSites:
326     for oneSite in listSite:
327 gutsche 1.35 allSites.append(oneSite)
328     allSites = self.uniquelist(allSites)
329 gutsche 1.3
330 gutsche 1.92 # screen output
331     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
332    
333 gutsche 1.35 return sites
334 ewv 1.131
335 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
336 slacapra 1.9 """
337 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
338     and no more than one block.
339     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
340     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
341     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
342     self.maxEvents, self.filesbyblock
343     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
344     self.total_number_of_jobs - Total # of jobs
345     self.list_of_args - File(s) job will run on (a list of lists)
346     """
347    
348     # ---- Handle the possible job splitting configurations ---- #
349     if (self.selectTotalNumberEvents):
350     totalEventsRequested = self.total_number_of_events
351     if (self.selectEventsPerJob):
352     eventsPerJobRequested = self.eventsPerJob
353     if (self.selectNumberOfJobs):
354     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
355    
356     # If user requested all the events in the dataset
357     if (totalEventsRequested == -1):
358     eventsRemaining=self.maxEvents
359     # If user requested more events than are in the dataset
360     elif (totalEventsRequested > self.maxEvents):
361     eventsRemaining = self.maxEvents
362     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
363     # If user requested less events than are in the dataset
364     else:
365     eventsRemaining = totalEventsRequested
366 slacapra 1.22
367 slacapra 1.41 # If user requested more events per job than are in the dataset
368     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
369     eventsPerJobRequested = self.maxEvents
370    
371 gutsche 1.35 # For user info at end
372     totalEventCount = 0
373 gutsche 1.3
374 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
375     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
376 slacapra 1.22
377 gutsche 1.35 if (self.selectNumberOfJobs):
378     common.logger.message("May not create the exact number_of_jobs requested.")
379 slacapra 1.23
380 gutsche 1.38 if ( self.ncjobs == 'all' ) :
381     totalNumberOfJobs = 999999999
382     else :
383     totalNumberOfJobs = self.ncjobs
384 ewv 1.131
385 gutsche 1.35 blocks = blockSites.keys()
386     blockCount = 0
387     # Backup variable in case self.maxEvents counted events in a non-included block
388     numBlocksInDataset = len(blocks)
389 gutsche 1.3
390 gutsche 1.35 jobCount = 0
391     list_of_lists = []
392 gutsche 1.3
393 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
394     jobsOfBlock = {}
395    
396 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
397     # ---- we've met the requested total # of events ---- #
398 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
399 gutsche 1.35 block = blocks[blockCount]
400 gutsche 1.44 blockCount += 1
401 gutsche 1.104 if block not in jobsOfBlock.keys() :
402     jobsOfBlock[block] = []
403 ewv 1.131
404 gutsche 1.68 if self.eventsbyblock.has_key(block) :
405     numEventsInBlock = self.eventsbyblock[block]
406     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
407 ewv 1.131
408 gutsche 1.68 files = self.filesbyblock[block]
409     numFilesInBlock = len(files)
410     if (numFilesInBlock <= 0):
411     continue
412     fileCount = 0
413    
414     # ---- New block => New job ---- #
415 ewv 1.131 parString = ""
416 gutsche 1.68 # counter for number of events in files currently worked on
417     filesEventCount = 0
418     # flag if next while loop should touch new file
419     newFile = 1
420     # job event counter
421     jobSkipEventCount = 0
422 ewv 1.131
423 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
424     # ---- total # of events or we've gone over all the files in this block ---- #
425 spiga 1.204 pString=''
426 gutsche 1.68 while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
427     file = files[fileCount]
428 spiga 1.204 if self.useParent:
429     parent = self.parentFiles[file]
430     for f in parent :
431     pString += '\\\"' + f + '\\\"\,'
432     common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
433     common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
434 gutsche 1.68 if newFile :
435     try:
436     numEventsInFile = self.eventsbyfile[file]
437     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
438     # increase filesEventCount
439     filesEventCount += numEventsInFile
440     # Add file to current job
441     parString += '\\\"' + file + '\\\"\,'
442     newFile = 0
443     except KeyError:
444     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
445 ewv 1.131
446 slacapra 1.177 eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
447 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
448 slacapra 1.177 if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
449 gutsche 1.68 # if last file in block
450     if ( fileCount == numFilesInBlock-1 ) :
451     # end job using last file, use remaining events in block
452     # close job and touch new file
453     fullString = parString[:-2]
454 spiga 1.204 if self.useParent:
455     fullParentString = pString[:-2]
456     list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
457     else:
458     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
459 gutsche 1.68 common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
460     self.jobDestination.append(blockSites[block])
461     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
462 gutsche 1.92 # fill jobs of block dictionary
463 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
464 gutsche 1.68 # reset counter
465     jobCount = jobCount + 1
466     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
467     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
468     jobSkipEventCount = 0
469     # reset file
470 spiga 1.204 pString = ""
471 ewv 1.131 parString = ""
472 gutsche 1.68 filesEventCount = 0
473     newFile = 1
474     fileCount += 1
475     else :
476     # go to next file
477     newFile = 1
478     fileCount += 1
479     # if events in file equal to eventsPerJobRequested
480     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
481 gutsche 1.38 # close job and touch new file
482     fullString = parString[:-2]
483 spiga 1.204 if self.useParent:
484     fullParentString = pString[:-2]
485     list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
486 ewv 1.226 else:
487 spiga 1.204 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
488 gutsche 1.68 common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
489 gutsche 1.38 self.jobDestination.append(blockSites[block])
490     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
491 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
492 gutsche 1.38 # reset counter
493     jobCount = jobCount + 1
494 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
495     eventsRemaining = eventsRemaining - eventsPerJobRequested
496 gutsche 1.38 jobSkipEventCount = 0
497     # reset file
498 spiga 1.204 pString = ""
499 ewv 1.131 parString = ""
500 gutsche 1.38 filesEventCount = 0
501     newFile = 1
502     fileCount += 1
503 ewv 1.131
504 gutsche 1.68 # if more events in file remain than eventsPerJobRequested
505 gutsche 1.38 else :
506 gutsche 1.68 # close job but don't touch new file
507     fullString = parString[:-2]
508 spiga 1.204 if self.useParent:
509     fullParentString = pString[:-2]
510     list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
511     else:
512     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
513 gutsche 1.68 common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
514     self.jobDestination.append(blockSites[block])
515     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
516 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
517 gutsche 1.68 # increase counter
518     jobCount = jobCount + 1
519     totalEventCount = totalEventCount + eventsPerJobRequested
520     eventsRemaining = eventsRemaining - eventsPerJobRequested
521     # calculate skip events for last file
522     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
523     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
524     # remove all but the last file
525     filesEventCount = self.eventsbyfile[file]
526 spiga 1.204 if self.useParent:
527     for f in parent : pString += '\\\"' + f + '\\\"\,'
528 ewv 1.160 parString = '\\\"' + file + '\\\"\,'
529 gutsche 1.68 pass # END if
530     pass # END while (iterate over files in the block)
531 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
532 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
533 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
534 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
535 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
536 ewv 1.131
537 gutsche 1.92 # screen output
538     screenOutput = "List of jobs and available destination sites:\n\n"
539    
540 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
541     noSiteBlock = []
542     bloskNoSite = []
543    
544 gutsche 1.92 blockCounter = 0
545 gutsche 1.104 for block in blocks:
546     if block in jobsOfBlock.keys() :
547     blockCounter += 1
548 slacapra 1.176 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
549     ','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
550 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
551 ewv 1.131 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
552 mcinquil 1.124 bloskNoSite.append( blockCounter )
553 ewv 1.131
554 mcinquil 1.124 common.logger.message(screenOutput)
555 fanzago 1.127 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
556 mcinquil 1.126 msg = 'WARNING: No sites are hosting any part of data for block:\n '
557     virgola = ""
558     if len(bloskNoSite) > 1:
559     virgola = ","
560     for block in bloskNoSite:
561     msg += ' ' + str(block) + virgola
562     msg += '\n Related jobs:\n '
563     virgola = ""
564     if len(noSiteBlock) > 1:
565     virgola = ","
566     for range_jobs in noSiteBlock:
567     msg += str(range_jobs) + virgola
568     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
569 slacapra 1.155 if self.cfg_params.has_key('EDG.se_white_list'):
570     msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
571     msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
572     msg += 'Please check if the dataset is available at this site!)\n'
573     if self.cfg_params.has_key('EDG.ce_white_list'):
574     msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
575     msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
576     msg += 'Please check if the dataset is available at this site!)\n'
577    
578 mcinquil 1.126 common.logger.message(msg)
579 gutsche 1.92
580 slacapra 1.9 self.list_of_args = list_of_lists
581     return
582    
583 slacapra 1.21 def jobSplittingNoInput(self):
584 slacapra 1.9 """
585     Perform job splitting based on number of event per job
586     """
587     common.logger.debug(5,'Splitting per events')
588 fanzago 1.130
589 ewv 1.131 if (self.selectEventsPerJob):
590 fanzago 1.130 common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
591     if (self.selectNumberOfJobs):
592     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
593     if (self.selectTotalNumberEvents):
594     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
595 slacapra 1.9
596 slacapra 1.10 if (self.total_number_of_events < 0):
597     msg='Cannot split jobs per Events with "-1" as total number of events'
598     raise CrabException(msg)
599    
600 slacapra 1.22 if (self.selectEventsPerJob):
601 spiga 1.65 if (self.selectTotalNumberEvents):
602     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
603 ewv 1.131 elif(self.selectNumberOfJobs) :
604 spiga 1.65 self.total_number_of_jobs =self.theNumberOfJobs
605 ewv 1.131 self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
606 spiga 1.65
607 slacapra 1.22 elif (self.selectNumberOfJobs) :
608     self.total_number_of_jobs = self.theNumberOfJobs
609     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
610 ewv 1.131
611 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
612    
613     # is there any remainder?
614     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
615    
616     common.logger.debug(5,'Check '+str(check))
617    
618 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
619 slacapra 1.9 if check > 0:
620 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
621 slacapra 1.9
622 slacapra 1.10 # argument is seed number.$i
623 slacapra 1.9 self.list_of_args = []
624     for i in range(self.total_number_of_jobs):
625 gutsche 1.35 ## Since there is no input, any site is good
626 ewv 1.131 self.jobDestination.append([""]) #must be empty to write correctly the xml
627 slacapra 1.90 args=[]
628 spiga 1.57 if (self.firstRun):
629 slacapra 1.138 ## pythia first run
630 slacapra 1.90 args.append(str(self.firstRun)+str(i))
631     self.list_of_args.append(args)
632 ewv 1.131
633 gutsche 1.3 return
634    
635 spiga 1.42
636 spiga 1.187 def jobSplittingForScript(self):
637 spiga 1.42 """
638     Perform job splitting based on number of job
639     """
640     common.logger.debug(5,'Splitting per job')
641     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
642    
643     self.total_number_of_jobs = self.theNumberOfJobs
644    
645     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
646    
647     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
648    
649     # argument is seed number.$i
650     self.list_of_args = []
651     for i in range(self.total_number_of_jobs):
652     self.jobDestination.append([""])
653     self.list_of_args.append([str(i)])
654     return
655    
656 spiga 1.208 def split(self, jobParams,firstJobID):
657 ewv 1.131
658 gutsche 1.3 njobs = self.total_number_of_jobs
659 slacapra 1.9 arglist = self.list_of_args
660 gutsche 1.3 # create the empty structure
661     for i in range(njobs):
662     jobParams.append("")
663 ewv 1.131
664 spiga 1.165 listID=[]
665     listField=[]
666 spiga 1.208 for id in range(njobs):
667     job = id + int(firstJobID)
668     jobParams[id] = arglist[id]
669 spiga 1.167 listID.append(job+1)
670 spiga 1.162 job_ToSave ={}
671 spiga 1.169 concString = ' '
672 spiga 1.165 argu=''
673 spiga 1.208 if len(jobParams[id]):
674     argu += concString.join(jobParams[id] )
675 spiga 1.187 job_ToSave['arguments']= str(job+1)+' '+argu
676 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
677 spiga 1.165 listField.append(job_ToSave)
678 spiga 1.169 msg="Job "+str(job)+" Arguments: "+str(job+1)+" "+argu+"\n" \
679 spiga 1.208 +" Destination: "+str(self.jobDestination[id])
680 spiga 1.165 common.logger.debug(5,msg)
681 spiga 1.187 common._db.updateJob_(listID,listField)
682 spiga 1.181 self.argsList = (len(jobParams[0])+1)
683 gutsche 1.3
684     return
685 ewv 1.131
686 gutsche 1.3 def numberOfJobs(self):
687     return self.total_number_of_jobs
688    
689 slacapra 1.1 def getTarBall(self, exe):
690     """
691     Return the TarBall with lib and exe
692     """
693 corvo 1.56 self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
694 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
695     return self.tgzNameWithPath
696    
697     # Prepare a tar gzipped file with user binaries.
698     self.buildTar_(exe)
699    
700     return string.strip(self.tgzNameWithPath)
701    
702     def buildTar_(self, executable):
703    
704     # First of all declare the user Scram area
705     swArea = self.scram.getSWArea_()
706     swReleaseTop = self.scram.getReleaseTop_()
707 ewv 1.131
708 slacapra 1.1 ## check if working area is release top
709     if swReleaseTop == '' or swArea == swReleaseTop:
710 afanfani 1.172 common.logger.debug(3,"swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
711 slacapra 1.1 return
712    
713 slacapra 1.61 import tarfile
714     try: # create tar ball
715     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
716     ## First find the executable
717 slacapra 1.86 if (self.executable != ''):
718 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
719     if ( not exeWithPath ):
720     raise CrabException('User executable '+executable+' not found')
721 ewv 1.131
722 slacapra 1.61 ## then check if it's private or not
723     if exeWithPath.find(swReleaseTop) == -1:
724     # the exe is private, so we must ship
725     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
726     path = swArea+'/'
727 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
728     if exeWithPath.find(path) >= 0 :
729     exe = string.replace(exeWithPath, path,'')
730 slacapra 1.129 tar.add(path+exe,exe)
731 corvo 1.85 else :
732     tar.add(exeWithPath,os.path.basename(executable))
733 slacapra 1.61 pass
734     else:
735     # the exe is from release, we'll find it on WN
736     pass
737 ewv 1.131
738 slacapra 1.61 ## Now get the libraries: only those in local working area
739     libDir = 'lib'
740     lib = swArea+'/' +libDir
741     common.logger.debug(5,"lib "+lib+" to be tarred")
742     if os.path.exists(lib):
743     tar.add(lib,libDir)
744 ewv 1.131
745 slacapra 1.61 ## Now check if module dir is present
746     moduleDir = 'module'
747     module = swArea + '/' + moduleDir
748     if os.path.isdir(module):
749     tar.add(module,moduleDir)
750    
751     ## Now check if any data dir(s) is present
752 spiga 1.179 self.dataExist = False
753 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
754 slacapra 1.206 while len(todo_list):
755     entry, name = todo_list.pop()
756 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
757 slacapra 1.206 continue
758 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
759 slacapra 1.206 entryPath = entry + '/'
760 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
761 slacapra 1.206 if name == 'data':
762     self.dataExist=True
763     common.logger.debug(5,"data "+entry+" to be tarred")
764 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
765 slacapra 1.206 pass
766     pass
767 ewv 1.182
768 spiga 1.179 ### CMSSW ParameterSet
769     if not self.pset is None:
770     cfg_file = common.work_space.jobDir()+self.configFilename()
771 ewv 1.182 tar.add(cfg_file,self.configFilename())
772 spiga 1.179 common.logger.debug(5,"File added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
773 slacapra 1.61
774 fanzago 1.93
775 fanzago 1.152 ## Add ProdCommon dir to tar
776 slacapra 1.211 prodcommonDir = './'
777     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
778 slacapra 1.214 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools','ProdCommon/Core','ProdCommon/MCPayloads', 'IMProv']
779     for file in neededStuff:
780     tar.add(prodcommonPath+file,prodcommonDir+file)
781 spiga 1.179 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
782    
783     ##### ML stuff
784     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
785     path=os.environ['CRABDIR'] + '/python/'
786     for file in ML_file_list:
787     tar.add(path+file,file)
788     common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
789    
790     ##### Utils
791 spiga 1.203 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py']
792 spiga 1.179 for file in Utils_file_list:
793     tar.add(path+file,file)
794     common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
795 ewv 1.131
796 ewv 1.182 ##### AdditionalFiles
797 spiga 1.179 for file in self.additional_inbox_files:
798     tar.add(file,string.split(file,'/')[-1])
799 slacapra 1.61 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
800 ewv 1.182
801 slacapra 1.61 tar.close()
802 slacapra 1.220 except IOError:
803     raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
804 slacapra 1.212 except tarfile.TarError:
805 slacapra 1.206 raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
806 gutsche 1.72
807     ## check for tarball size
808     tarballinfo = os.stat(self.tgzNameWithPath)
809     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
810     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
811    
812 slacapra 1.61 ## create tar-ball with ML stuff
813 slacapra 1.97
814 spiga 1.165 def wsSetupEnvironment(self, nj=0):
815 slacapra 1.1 """
816     Returns part of a job script which prepares
817     the execution environment for the job 'nj'.
818     """
819 ewv 1.184 if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
820     psetName = 'pset.py'
821     else:
822     psetName = 'pset.cfg'
823 slacapra 1.1 # Prepare JobType-independent part
824 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
825 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
826 ewv 1.131 txt += 'if [ $middleware == LCG ]; then \n'
827 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
828     txt += 'elif [ $middleware == OSG ]; then\n'
829 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
830 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
831 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
832     txt += ' job_exit_code=10016\n'
833     txt += ' func_exit\n'
834 gutsche 1.3 txt += ' fi\n'
835 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
836 gutsche 1.3 txt += '\n'
837     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
838     txt += ' cd $WORKING_DIR\n'
839 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
840 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
841 gutsche 1.3 txt += 'fi\n'
842 slacapra 1.1
843     # Prepare JobType-specific part
844     scram = self.scram.commandName()
845     txt += '\n\n'
846 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
847     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
848 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
849     txt += 'status=$?\n'
850     txt += 'if [ $status != 0 ] ; then\n'
851 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
852     txt += ' job_exit_code=10034\n'
853 fanzago 1.163 txt += ' func_exit\n'
854 slacapra 1.1 txt += 'fi \n'
855     txt += 'cd '+self.version+'\n'
856 fanzago 1.99 txt += 'SOFTWARE_DIR=`pwd`\n'
857 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
858 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
859 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
860     txt += ' echo "ERROR ==> Problem with the command: "\n'
861     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
862     txt += ' job_exit_code=10034\n'
863     txt += ' func_exit\n'
864     txt += 'fi \n'
865 slacapra 1.1 # Handle the arguments:
866     txt += "\n"
867 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
868 slacapra 1.1 txt += "\n"
869 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
870 slacapra 1.1 txt += "then\n"
871 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
872     txt += ' job_exit_code=50113\n'
873     txt += " func_exit\n"
874 slacapra 1.1 txt += "fi\n"
875     txt += "\n"
876    
877     # Prepare job-specific part
878     job = common.job_list[nj]
879 ewv 1.131 if (self.datasetPath):
880 fanzago 1.93 txt += '\n'
881     txt += 'DatasetPath='+self.datasetPath+'\n'
882    
883     datasetpath_split = self.datasetPath.split("/")
884 fanzago 1.230 ### FEDE FOR NEW LFN ###
885     self.primaryDataset = datasetpath_split[1]
886     ########################
887 fanzago 1.93 txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
888     txt += 'DataTier='+datasetpath_split[2]+'\n'
889 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
890 fanzago 1.93
891     else:
892     txt += 'DatasetPath=MCDataTier\n'
893 fanzago 1.230 ### FEDE FOR NEW LFN ###
894     self.primaryDataset = 'null'
895     ########################
896 fanzago 1.93 txt += 'PrimaryDataset=null\n'
897     txt += 'DataTier=null\n'
898     txt += 'ApplicationFamily=MCDataTier\n'
899 ewv 1.170 if self.pset != None:
900 spiga 1.42 pset = os.path.basename(job.configFilename())
901     txt += '\n'
902 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
903 spiga 1.42 if (self.datasetPath): # standard job
904 ewv 1.160 txt += 'InputFiles=${args[1]}; export InputFiles\n'
905 ewv 1.226 if (self.useParent):
906 spiga 1.204 txt += 'ParentFiles=${args[2]}; export ParentFiles\n'
907     txt += 'MaxEvents=${args[3]}; export MaxEvents\n'
908     txt += 'SkipEvents=${args[4]}; export SkipEvents\n'
909     else:
910     txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
911     txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
912 spiga 1.42 txt += 'echo "Inputfiles:<$InputFiles>"\n'
913 spiga 1.204 if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n'
914 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
915     txt += 'echo "SkipEvents:<$SkipEvents>"\n'
916     else: # pythia like job
917 ewv 1.160 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
918     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
919     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
920     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
921 slacapra 1.90 if (self.firstRun):
922 ewv 1.160 txt += 'FirstRun=${args[1]}; export FirstRun\n'
923 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
924 slacapra 1.90
925 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
926 slacapra 1.1
927    
928 fanzago 1.163 if self.pset != None:
929 ewv 1.184 # FUTURE: Can simply for 2_1_x and higher
930 spiga 1.42 txt += '\n'
931 spiga 1.197 if self.debug_wrapper==True:
932 spiga 1.188 txt += 'echo "***** cat ' + psetName + ' *********"\n'
933     txt += 'cat ' + psetName + '\n'
934     txt += 'echo "****** end ' + psetName + ' ********"\n'
935     txt += '\n'
936 ewv 1.226 if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
937     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
938     else:
939     txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
940 fanzago 1.94 txt += 'echo "PSETHASH = $PSETHASH" \n'
941 fanzago 1.93 txt += '\n'
942 gutsche 1.3 return txt
943 slacapra 1.176
944 fanzago 1.166 def wsUntarSoftware(self, nj=0):
945 gutsche 1.3 """
946     Put in the script the commands to build an executable
947     or a library.
948     """
949    
950 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
951 gutsche 1.3
952     if os.path.isfile(self.tgzNameWithPath):
953 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
954 gutsche 1.3 txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
955 spiga 1.199 if self.debug_wrapper:
956     txt += 'ls -Al \n'
957 gutsche 1.3 txt += 'untar_status=$? \n'
958     txt += 'if [ $untar_status -ne 0 ]; then \n'
959 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
960     txt += ' job_exit_code=$untar_status\n'
961     txt += ' func_exit\n'
962 gutsche 1.3 txt += 'else \n'
963     txt += ' echo "Successful untar" \n'
964     txt += 'fi \n'
965 gutsche 1.50 txt += '\n'
966 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
967 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
968 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
969 gutsche 1.50 txt += 'else\n'
970 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
971 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
972 gutsche 1.50 txt += 'fi\n'
973     txt += '\n'
974    
975 gutsche 1.3 pass
976 ewv 1.131
977 slacapra 1.1 return txt
978 ewv 1.170
979 fanzago 1.166 def wsBuildExe(self, nj=0):
980     """
981     Put in the script the commands to build an executable
982     or a library.
983     """
984    
985     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
986     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
987    
988 ewv 1.170 txt += 'rm -r lib/ module/ \n'
989     txt += 'mv $RUNTIME_AREA/lib/ . \n'
990     txt += 'mv $RUNTIME_AREA/module/ . \n'
991 spiga 1.186 if self.dataExist == True:
992     txt += 'rm -r src/ \n'
993     txt += 'mv $RUNTIME_AREA/src/ . \n'
994 ewv 1.182 if len(self.additional_inbox_files)>0:
995 spiga 1.179 for file in self.additional_inbox_files:
996 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
997 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
998     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
999 ewv 1.170
1000 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
1001 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1002 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
1003 fanzago 1.166 txt += 'else\n'
1004 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
1005 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1006     txt += 'fi\n'
1007     txt += '\n'
1008    
1009     return txt
1010 slacapra 1.1
1011 ewv 1.131
1012 slacapra 1.1 def executableName(self):
1013 ewv 1.192 if self.scriptExe:
1014 spiga 1.42 return "sh "
1015     else:
1016     return self.executable
1017 slacapra 1.1
1018     def executableArgs(self):
1019 ewv 1.160 # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
1020 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1021 spiga 1.42 return self.scriptExe + " $NJob"
1022 fanzago 1.115 else:
1023 ewv 1.160 ex_args = ""
1024 ewv 1.171 # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
1025 ewv 1.160 # Framework job report
1026 ewv 1.184 if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
1027 fanzago 1.166 ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
1028 ewv 1.184 # Type of config file
1029     if self.CMSSW_major >= 2 :
1030 ewv 1.171 ex_args += " -p pset.py"
1031 fanzago 1.115 else:
1032 ewv 1.160 ex_args += " -p pset.cfg"
1033     return ex_args
1034 slacapra 1.1
1035     def inputSandbox(self, nj):
1036     """
1037     Returns a list of filenames to be put in JDL input sandbox.
1038     """
1039     inp_box = []
1040     if os.path.isfile(self.tgzNameWithPath):
1041     inp_box.append(self.tgzNameWithPath)
1042 spiga 1.168 wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
1043     inp_box.append(common.work_space.pathForTgz() +'job/'+ wrapper)
1044 slacapra 1.1 return inp_box
1045    
1046     def outputSandbox(self, nj):
1047     """
1048     Returns a list of filenames to be put in JDL output sandbox.
1049     """
1050     out_box = []
1051    
1052     ## User Declared output files
1053 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1054 ewv 1.131 n_out = nj + 1
1055 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
1056 slacapra 1.1 return out_box
1057    
1058    
1059     def wsRenameOutput(self, nj):
1060     """
1061     Returns part of a job script which renames the produced files.
1062     """
1063    
1064 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1065 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1066     txt += 'echo ">>> current directory content:"\n'
1067 ewv 1.226 if self.debug_wrapper:
1068 spiga 1.199 txt += 'ls -Al\n'
1069 fanzago 1.145 txt += '\n'
1070 slacapra 1.54
1071 fanzago 1.128 for fileWithSuffix in (self.output_file):
1072 slacapra 1.207 output_file_num = numberFile(fileWithSuffix, '$NJob')
1073 slacapra 1.1 txt += '\n'
1074 gutsche 1.7 txt += '# check output file\n'
1075 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1076 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1077     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
1078 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1079 ewv 1.147 else:
1080     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1081     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1082 slacapra 1.106 txt += 'else\n'
1083 fanzago 1.161 txt += ' job_exit_code=60302\n'
1084     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1085 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
1086 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
1087     txt += ' echo "prepare dummy output file"\n'
1088     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1089     txt += ' fi \n'
1090 slacapra 1.1 txt += 'fi\n'
1091 slacapra 1.105 file_list = []
1092     for fileWithSuffix in (self.output_file):
1093 slacapra 1.207 file_list.append(numberFile(fileWithSuffix, '$NJob'))
1094 ewv 1.131
1095 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1096 fanzago 1.149 txt += '\n'
1097 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1098     txt += 'echo ">>> current directory content:"\n'
1099 ewv 1.226 if self.debug_wrapper:
1100 spiga 1.199 txt += 'ls -Al\n'
1101 fanzago 1.148 txt += '\n'
1102 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1103 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1104 slacapra 1.1 return txt
1105    
1106 slacapra 1.63 def getRequirements(self, nj=[]):
1107 slacapra 1.1 """
1108 ewv 1.131 return job requirements to add to jdl files
1109 slacapra 1.1 """
1110     req = ''
1111 slacapra 1.47 if self.version:
1112 slacapra 1.10 req='Member("VO-cms-' + \
1113 slacapra 1.47 self.version + \
1114 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1115 ewv 1.192 if self.executable_arch:
1116 gutsche 1.107 req+=' && Member("VO-cms-' + \
1117 slacapra 1.105 self.executable_arch + \
1118     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1119 gutsche 1.35
1120     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1121 afanfani 1.229 if ( common.scheduler.name() == "glitecoll" ) or ( common.scheduler.name() == "glite"):
1122 afanfani 1.158 req += ' && other.GlueCEStateStatus == "Production" '
1123 gutsche 1.35
1124 slacapra 1.1 return req
1125 gutsche 1.3
1126     def configFilename(self):
1127     """ return the config filename """
1128 ewv 1.182 # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
1129 ewv 1.184 if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1130 ewv 1.182 return self.name()+'.py'
1131     else:
1132     return self.name()+'.cfg'
1133 gutsche 1.3
1134     def wsSetupCMSOSGEnvironment_(self):
1135     """
1136     Returns part of a job script which is prepares
1137     the execution environment and which is common for all CMS jobs.
1138     """
1139 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1140     txt += ' echo ">>> setup CMS OSG environment:"\n'
1141 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1142     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1143 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1144 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1145 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1146 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1147     txt += ' else\n'
1148 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1149     txt += ' job_exit_code=10020\n'
1150     txt += ' func_exit\n'
1151 fanzago 1.133 txt += ' fi\n'
1152 gutsche 1.3 txt += '\n'
1153 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1154 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1155 gutsche 1.3
1156     return txt
1157 ewv 1.131
1158 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
1159     """
1160     Returns part of a job script which is prepares
1161     the execution environment and which is common for all CMS jobs.
1162     """
1163 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1164     txt += ' echo ">>> setup CMS LCG environment:"\n'
1165 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1166     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1167     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1168     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1169 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1170     txt += ' job_exit_code=10031\n'
1171     txt += ' func_exit\n'
1172 fanzago 1.133 txt += ' else\n'
1173     txt += ' echo "Sourcing environment... "\n'
1174     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1175 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1176     txt += ' job_exit_code=10020\n'
1177     txt += ' func_exit\n'
1178 fanzago 1.133 txt += ' fi\n'
1179     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1180     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1181     txt += ' result=$?\n'
1182     txt += ' if [ $result -ne 0 ]; then\n'
1183 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1184     txt += ' job_exit_code=10032\n'
1185     txt += ' func_exit\n'
1186 fanzago 1.133 txt += ' fi\n'
1187     txt += ' fi\n'
1188     txt += ' \n'
1189 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1190 gutsche 1.3 return txt
1191 gutsche 1.5
1192 fanzago 1.93 def modifyReport(self, nj):
1193     """
1194 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1195 fanzago 1.93 """
1196 ewv 1.160 txt = '\n#Written by cms_cmssw::modifyReport\n'
1197 slacapra 1.176 publish_data = int(self.cfg_params.get('USER.publish_data',0))
1198 ewv 1.131 if (publish_data == 1):
1199 fanzago 1.94 processedDataset = self.cfg_params['USER.publish_data_name']
1200 fanzago 1.230 if (self.primaryDataset == 'null'):
1201     self.primaryDataset = processedDataset
1202 fanzago 1.225 if (common.scheduler.name().upper() == "CAF" or common.scheduler.name().upper() == "LSF"):
1203 fanzago 1.230 ### FEDE FOR NEW LFN ###
1204     LFNBaseName = LFNBase(self.primaryDataset, processedDataset, LocalUser=True)
1205     self.user = getUserName(LocalUser=True)
1206     ########################
1207 ewv 1.227 else :
1208 fanzago 1.230 ### FEDE FOR NEW LFN ###
1209     LFNBaseName = LFNBase(self.primaryDataset, processedDataset)
1210     self.user = getUserName()
1211     ########################
1212 fanzago 1.175
1213     txt += 'if [ $copy_exit_status -eq 0 ]; then\n'
1214 fanzago 1.230 ### FEDE FOR NEW LFN ###
1215     #txt += ' FOR_LFN=%s_${PSETHASH}/\n'%(LFNBaseName)
1216     txt += ' FOR_LFN=%s/${PSETHASH}/\n'%(LFNBaseName)
1217     ########################
1218 fanzago 1.175 txt += 'else\n'
1219     txt += ' FOR_LFN=/copy_problems/ \n'
1220     txt += ' SE=""\n'
1221     txt += ' SE_PATH=""\n'
1222     txt += 'fi\n'
1223 ewv 1.182
1224 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1225 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1226 fanzago 1.175 txt += 'ProcessedDataset='+processedDataset+'\n'
1227     txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1228     txt += 'echo "SE = $SE"\n'
1229     txt += 'echo "SE_PATH = $SE_PATH"\n'
1230     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1231     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1232 fanzago 1.230 ### FEDE FOR NEW LFN ###
1233     txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier ' + self.user + '-$ProcessedDataset-$PSETHASH $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1234     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier ' + self.user + '-$ProcessedDataset-$PSETHASH $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1235     ########################
1236 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1237     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1238     txt += ' modifyReport_result=70500\n'
1239     txt += ' job_exit_code=$modifyReport_result\n'
1240     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1241     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1242     txt += 'else\n'
1243     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1244 spiga 1.103 txt += 'fi\n'
1245 fanzago 1.93 return txt
1246 fanzago 1.99
1247 ewv 1.192 def wsParseFJR(self):
1248 spiga 1.189 """
1249 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1250 spiga 1.189 """
1251     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1252     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1253     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1254     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1255 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1256     if self.debug_wrapper :
1257     txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1258     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1259 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1260     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1261 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1262 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1263 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1264     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1265 spiga 1.189 txt += ' else\n'
1266     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1267     txt += ' fi\n'
1268     txt += ' else\n'
1269     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1270     txt += ' fi\n'
1271     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1272    
1273 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1274     txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1275 spiga 1.233 if (self.datasetPath and not (self.dataset_pu or self.useParent)) :
1276 spiga 1.189 # VERIFY PROCESSED DATA
1277     txt += ' echo ">>> Verify list of processed files:"\n'
1278 ewv 1.196 txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1279 spiga 1.200 txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1280 spiga 1.189 txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1281     txt += ' mv tmp.txt input-files.txt\n'
1282     txt += ' echo "cat input-files.txt"\n'
1283     txt += ' echo "----------------------"\n'
1284     txt += ' cat input-files.txt\n'
1285     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1286     txt += ' mv tmp.txt processed-files.txt\n'
1287     txt += ' echo "----------------------"\n'
1288     txt += ' echo "cat processed-files.txt"\n'
1289     txt += ' echo "----------------------"\n'
1290     txt += ' cat processed-files.txt\n'
1291     txt += ' echo "----------------------"\n'
1292     txt += ' diff -q input-files.txt processed-files.txt\n'
1293     txt += ' fileverify_status=$?\n'
1294     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1295     txt += ' executable_exit_status=30001\n'
1296     txt += ' echo "ERROR ==> not all input files processed"\n'
1297     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1298     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1299     txt += ' fi\n'
1300 spiga 1.232 txt += ' elif [ $executable_exit_status -ne 0 ] || [ $executable_exit_status -ne 50015 ] || [ $executable_exit_status -ne 50017 ];then\n'
1301     txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1302     txt += ' func_exit\n'
1303     txt += ' fi\n'
1304     txt += '\n'
1305 spiga 1.189 txt += 'else\n'
1306     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1307     txt += 'fi\n'
1308     txt += '\n'
1309     txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1310     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1311     txt += 'job_exit_code=$executable_exit_status\n'
1312    
1313     return txt
1314    
1315 gutsche 1.5 def setParam_(self, param, value):
1316     self._params[param] = value
1317    
1318     def getParams(self):
1319     return self._params
1320 gutsche 1.8
1321 gutsche 1.35 def uniquelist(self, old):
1322     """
1323     remove duplicates from a list
1324     """
1325     nd={}
1326     for e in old:
1327     nd[e]=0
1328     return nd.keys()
1329 mcinquil 1.121
1330 spiga 1.169 def outList(self):
1331 mcinquil 1.121 """
1332     check the dimension of the output files
1333     """
1334 spiga 1.169 txt = ''
1335     txt += 'echo ">>> list of expected files on output sandbox"\n'
1336 mcinquil 1.121 listOutFiles = []
1337 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1338 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1339 fanzago 1.148 if (self.return_data == 1):
1340 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1341 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1342 spiga 1.169 listOutFiles.append(stdout)
1343     listOutFiles.append(stderr)
1344 ewv 1.156 else:
1345 spiga 1.157 for file in (self.output_file_sandbox):
1346 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1347 spiga 1.169 listOutFiles.append(stdout)
1348     listOutFiles.append(stderr)
1349 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1350 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1351 spiga 1.169 txt += 'export filesToCheck\n'
1352 ewv 1.170 return txt