ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.125
Committed: Wed Oct 10 13:29:26 2007 UTC (17 years, 6 months ago) by mcinquil
Content type: text/x-python
Branch: MAIN
Changes since 1.124: +1 -0 lines
Log Message:
Recupering missing line on output_exit_status

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8    
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 gutsche 1.38 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 gutsche 1.3 self._params = {}
17     self.cfg_params = cfg_params
18 gutsche 1.38
19 fanzago 1.115 # init BlackWhiteListParser
20     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21    
22 gutsche 1.72 try:
23     self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24     except KeyError:
25 slacapra 1.86 self.MaxTarBallSize = 9.5
26 gutsche 1.72
27 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
28 gutsche 1.38 self.ncjobs = ncjobs
29    
30 slacapra 1.1 log = common.logger
31    
32     self.scram = Scram.Scram(cfg_params)
33     self.additional_inbox_files = []
34     self.scriptExe = ''
35     self.executable = ''
36 slacapra 1.71 self.executable_arch = self.scram.getArch()
37 slacapra 1.1 self.tgz_name = 'default.tgz'
38 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
39 corvo 1.56 self.scriptName = 'CMSSW.sh'
40 spiga 1.42 self.pset = '' #scrip use case Da
41     self.datasetPath = '' #scrip use case Da
42 gutsche 1.3
43 gutsche 1.50 # set FJR file name
44     self.fjrFileName = 'crab_fjr.xml'
45    
46 slacapra 1.1 self.version = self.scram.getSWVersion()
47 spiga 1.114
48     #
49     # Try to block creation in case of arch/version mismatch
50     #
51    
52     a = string.split(self.version, "_")
53    
54     if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
56     raise CrabException(msg)
57     if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59     raise CrabException(msg)
60    
61 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
62 gutsche 1.5 self.setParam_('application', self.version)
63 slacapra 1.47
64 slacapra 1.1 ### collect Data cards
65 gutsche 1.66
66     ## get DBS mode
67     try:
68 slacapra 1.86 self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 gutsche 1.66 except KeyError:
70 slacapra 1.86 self.use_dbs_1 = 0
71 gutsche 1.66
72 slacapra 1.1 try:
73 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
74     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
75     if string.lower(tmp)=='none':
76     self.datasetPath = None
77 slacapra 1.21 self.selectNoInput = 1
78 slacapra 1.9 else:
79     self.datasetPath = tmp
80 slacapra 1.21 self.selectNoInput = 0
81 slacapra 1.1 except KeyError:
82 gutsche 1.3 msg = "Error: datasetpath not defined "
83 slacapra 1.1 raise CrabException(msg)
84 gutsche 1.5
85     # ML monitoring
86     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
87 slacapra 1.9 if not self.datasetPath:
88     self.setParam_('dataset', 'None')
89     self.setParam_('owner', 'None')
90     else:
91 gutsche 1.92 try:
92     datasetpath_split = self.datasetPath.split("/")
93     # standard style
94 mcinquil 1.120 self.setParam_('datasetFull', self.datasetPath)
95 gutsche 1.92 if self.use_dbs_1 == 1 :
96     self.setParam_('dataset', datasetpath_split[1])
97     self.setParam_('owner', datasetpath_split[-1])
98     else:
99     self.setParam_('dataset', datasetpath_split[1])
100     self.setParam_('owner', datasetpath_split[2])
101     except:
102     self.setParam_('dataset', self.datasetPath)
103     self.setParam_('owner', self.datasetPath)
104    
105 gutsche 1.8 self.setTaskid_()
106     self.setParam_('taskId', self.cfg_params['taskId'])
107 gutsche 1.5
108 slacapra 1.1 self.dataTiers = []
109    
110     ## now the application
111     try:
112     self.executable = cfg_params['CMSSW.executable']
113 gutsche 1.5 self.setParam_('exe', self.executable)
114 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
115     msg = "Default executable cmsRun overridden. Switch to " + self.executable
116     log.debug(3,msg)
117     except KeyError:
118     self.executable = 'cmsRun'
119 gutsche 1.5 self.setParam_('exe', self.executable)
120 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
121     log.debug(3,msg)
122     pass
123    
124     try:
125     self.pset = cfg_params['CMSSW.pset']
126     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 spiga 1.42 if self.pset.lower() != 'none' :
128     if (not os.path.exists(self.pset)):
129     raise CrabException("User defined PSet file "+self.pset+" does not exist")
130     else:
131     self.pset = None
132 slacapra 1.1 except KeyError:
133     raise CrabException("PSet file missing. Cannot run cmsRun ")
134    
135     # output files
136 slacapra 1.53 ## stuff which must be returned always via sandbox
137     self.output_file_sandbox = []
138    
139     # add fjr report by default via sandbox
140     self.output_file_sandbox.append(self.fjrFileName)
141    
142     # other output files to be returned via sandbox or copied to SE
143 slacapra 1.1 try:
144     self.output_file = []
145     tmp = cfg_params['CMSSW.output_file']
146     if tmp != '':
147     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
148     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
149     for tmp in tmpOutFiles:
150     tmp=string.strip(tmp)
151     self.output_file.append(tmp)
152     pass
153     else:
154 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155 slacapra 1.1 pass
156     pass
157     except KeyError:
158 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159 slacapra 1.1 pass
160    
161     # script_exe file as additional file in inputSandbox
162     try:
163 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
164     if self.scriptExe != '':
165     if not os.path.isfile(self.scriptExe):
166 slacapra 1.64 msg ="ERROR. file "+self.scriptExe+" not found"
167 slacapra 1.10 raise CrabException(msg)
168 spiga 1.42 self.additional_inbox_files.append(string.strip(self.scriptExe))
169 slacapra 1.1 except KeyError:
170 spiga 1.42 self.scriptExe = ''
171 slacapra 1.70
172 spiga 1.42 #CarlosDaniele
173     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174 slacapra 1.70 msg ="Error. script_exe not defined"
175 spiga 1.42 raise CrabException(msg)
176    
177 slacapra 1.1 ## additional input files
178     try:
179 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
180 slacapra 1.70 for tmp in tmpAddFiles:
181     tmp = string.strip(tmp)
182     dirname = ''
183     if not tmp[0]=="/": dirname = "."
184 corvo 1.85 files = []
185     if string.find(tmp,"*")>-1:
186     files = glob.glob(os.path.join(dirname, tmp))
187     if len(files)==0:
188     raise CrabException("No additional input file found with this pattern: "+tmp)
189     else:
190     files.append(tmp)
191 slacapra 1.70 for file in files:
192     if not os.path.exists(file):
193     raise CrabException("Additional input file not found: "+file)
194 slacapra 1.45 pass
195 slacapra 1.105 # fname = string.split(file, '/')[-1]
196     # storedFile = common.work_space.pathForTgz()+'share/'+fname
197     # shutil.copyfile(file, storedFile)
198     self.additional_inbox_files.append(string.strip(file))
199 slacapra 1.1 pass
200     pass
201 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
202 slacapra 1.1 except KeyError:
203     pass
204    
205 slacapra 1.9 # files per job
206 slacapra 1.1 try:
207 gutsche 1.35 if (cfg_params['CMSSW.files_per_jobs']):
208     raise CrabException("files_per_jobs no longer supported. Quitting.")
209 gutsche 1.3 except KeyError:
210 gutsche 1.35 pass
211 gutsche 1.3
212 slacapra 1.9 ## Events per job
213 gutsche 1.3 try:
214 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
215 slacapra 1.9 self.selectEventsPerJob = 1
216 gutsche 1.3 except KeyError:
217 slacapra 1.9 self.eventsPerJob = -1
218     self.selectEventsPerJob = 0
219    
220 slacapra 1.22 ## number of jobs
221     try:
222     self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
223     self.selectNumberOfJobs = 1
224     except KeyError:
225     self.theNumberOfJobs = 0
226     self.selectNumberOfJobs = 0
227 slacapra 1.10
228 gutsche 1.35 try:
229     self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
230     self.selectTotalNumberEvents = 1
231     except KeyError:
232     self.total_number_of_events = 0
233     self.selectTotalNumberEvents = 0
234    
235 spiga 1.42 if self.pset != None: #CarlosDaniele
236     if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
237     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
238     raise CrabException(msg)
239     else:
240     if (self.selectNumberOfJobs == 0):
241     msg = 'Must specify number_of_jobs.'
242     raise CrabException(msg)
243 gutsche 1.35
244 slacapra 1.22 ## source seed for pythia
245     try:
246     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
247     except KeyError:
248 slacapra 1.23 self.sourceSeed = None
249     common.logger.debug(5,"No seed given")
250 slacapra 1.22
251 slacapra 1.28 try:
252     self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
253     except KeyError:
254     self.sourceSeedVtx = None
255     common.logger.debug(5,"No vertex seed given")
256 slacapra 1.90
257     try:
258     self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259     except KeyError:
260     self.sourceSeedG4 = None
261     common.logger.debug(5,"No g4 sim hits seed given")
262    
263     try:
264     self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265     except KeyError:
266     self.sourceSeedMix = None
267     common.logger.debug(5,"No mix seed given")
268    
269 spiga 1.57 try:
270     self.firstRun = int(cfg_params['CMSSW.first_run'])
271     except KeyError:
272     self.firstRun = None
273     common.logger.debug(5,"No first run given")
274 spiga 1.42 if self.pset != None: #CarlosDaniele
275 slacapra 1.97 ver = string.split(self.version,"_")
276     if (int(ver[1])>=1 and int(ver[2])>=5):
277     import PsetManipulator150 as pp
278     else:
279     import PsetManipulator as pp
280     PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
281 gutsche 1.3
282 slacapra 1.1 #DBSDLS-start
283     ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
284     self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
285     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
286 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
287 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
288 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
289 gutsche 1.35 blockSites = {}
290 slacapra 1.9 if self.datasetPath:
291 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
292 slacapra 1.1 #DBSDLS-end
293    
294     self.tgzNameWithPath = self.getTarBall(self.executable)
295 slacapra 1.10
296 slacapra 1.9 ## Select Splitting
297 spiga 1.42 if self.selectNoInput:
298     if self.pset == None: #CarlosDaniele
299     self.jobSplittingForScript()
300     else:
301     self.jobSplittingNoInput()
302 gutsche 1.92 else:
303 corvo 1.56 self.jobSplittingByBlocks(blockSites)
304 gutsche 1.5
305 slacapra 1.22 # modify Pset
306 spiga 1.42 if self.pset != None: #CarlosDaniele
307 slacapra 1.86 try:
308     if (self.datasetPath): # standard job
309     # allow to processa a fraction of events in a file
310 slacapra 1.90 PsetEdit.inputModule("INPUT")
311     PsetEdit.maxEvent("INPUTMAXEVENTS")
312     PsetEdit.skipEvent("INPUTSKIPEVENTS")
313 slacapra 1.86 else: # pythia like job
314 slacapra 1.90 PsetEdit.maxEvent(self.eventsPerJob)
315 slacapra 1.86 if (self.firstRun):
316 slacapra 1.90 PsetEdit.pythiaFirstRun("INPUTFIRSTRUN") #First Run
317 slacapra 1.86 if (self.sourceSeed) :
318 slacapra 1.90 PsetEdit.pythiaSeed("INPUT")
319 slacapra 1.86 if (self.sourceSeedVtx) :
320 slacapra 1.90 PsetEdit.vtxSeed("INPUTVTX")
321     if (self.sourceSeedG4) :
322 slacapra 1.118 PsetEdit.g4Seed("INPUTG4")
323 slacapra 1.90 if (self.sourceSeedMix) :
324 slacapra 1.118 PsetEdit.mixSeed("INPUTMIX")
325 slacapra 1.86 # add FrameworkJobReport to parameter-set
326 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
327     PsetEdit.psetWriter(self.configFilename())
328 slacapra 1.86 except:
329     msg='Error while manipuliating ParameterSet: exiting...'
330     raise CrabException(msg)
331 gutsche 1.3
332 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
333    
334 slacapra 1.86 import DataDiscovery
335     import DataDiscovery_DBS2
336     import DataLocation
337 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
338    
339     datasetPath=self.datasetPath
340    
341 slacapra 1.1 ## Contact the DBS
342 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
343 slacapra 1.1 try:
344 gutsche 1.66
345 slacapra 1.86 if self.use_dbs_1 == 1 :
346     self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
347     else :
348 corvo 1.85 self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
349 slacapra 1.1 self.pubdata.fetchDBSInfo()
350    
351 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
352 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
353     raise CrabException(msg)
354 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
355 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
356     raise CrabException(msg)
357 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
358 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
359 slacapra 1.1 raise CrabException(msg)
360 gutsche 1.67 except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
361     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
362     raise CrabException(msg)
363     except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
364     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
365     raise CrabException(msg)
366     except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
367     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
368     raise CrabException(msg)
369 slacapra 1.1
370 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
371 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
372     self.eventsbyfile=self.pubdata.getEventsPerFile()
373 gutsche 1.3
374 slacapra 1.1 ## get max number of events
375     self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
376    
377     ## Contact the DLS and build a list of sites hosting the fileblocks
378     try:
379 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
380 gutsche 1.6 dataloc.fetchDLSInfo()
381 slacapra 1.41 except DataLocation.DataLocationError , ex:
382 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
383     raise CrabException(msg)
384    
385    
386 gutsche 1.35 sites = dataloc.getSites()
387     allSites = []
388     listSites = sites.values()
389 slacapra 1.63 for listSite in listSites:
390     for oneSite in listSite:
391 gutsche 1.35 allSites.append(oneSite)
392     allSites = self.uniquelist(allSites)
393 gutsche 1.3
394 gutsche 1.92 # screen output
395     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
396    
397 gutsche 1.35 return sites
398 gutsche 1.3
399 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
400 slacapra 1.9 """
401 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
402     and no more than one block.
403     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
404     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
405     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
406     self.maxEvents, self.filesbyblock
407     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
408     self.total_number_of_jobs - Total # of jobs
409     self.list_of_args - File(s) job will run on (a list of lists)
410     """
411    
412     # ---- Handle the possible job splitting configurations ---- #
413     if (self.selectTotalNumberEvents):
414     totalEventsRequested = self.total_number_of_events
415     if (self.selectEventsPerJob):
416     eventsPerJobRequested = self.eventsPerJob
417     if (self.selectNumberOfJobs):
418     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
419    
420     # If user requested all the events in the dataset
421     if (totalEventsRequested == -1):
422     eventsRemaining=self.maxEvents
423     # If user requested more events than are in the dataset
424     elif (totalEventsRequested > self.maxEvents):
425     eventsRemaining = self.maxEvents
426     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
427     # If user requested less events than are in the dataset
428     else:
429     eventsRemaining = totalEventsRequested
430 slacapra 1.22
431 slacapra 1.41 # If user requested more events per job than are in the dataset
432     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
433     eventsPerJobRequested = self.maxEvents
434    
435 gutsche 1.35 # For user info at end
436     totalEventCount = 0
437 gutsche 1.3
438 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
439     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
440 slacapra 1.22
441 gutsche 1.35 if (self.selectNumberOfJobs):
442     common.logger.message("May not create the exact number_of_jobs requested.")
443 slacapra 1.23
444 gutsche 1.38 if ( self.ncjobs == 'all' ) :
445     totalNumberOfJobs = 999999999
446     else :
447     totalNumberOfJobs = self.ncjobs
448    
449    
450 gutsche 1.35 blocks = blockSites.keys()
451     blockCount = 0
452     # Backup variable in case self.maxEvents counted events in a non-included block
453     numBlocksInDataset = len(blocks)
454 gutsche 1.3
455 gutsche 1.35 jobCount = 0
456     list_of_lists = []
457 gutsche 1.3
458 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
459     jobsOfBlock = {}
460    
461 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
462     # ---- we've met the requested total # of events ---- #
463 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
464 gutsche 1.35 block = blocks[blockCount]
465 gutsche 1.44 blockCount += 1
466 gutsche 1.104 if block not in jobsOfBlock.keys() :
467     jobsOfBlock[block] = []
468 gutsche 1.44
469 gutsche 1.68 if self.eventsbyblock.has_key(block) :
470     numEventsInBlock = self.eventsbyblock[block]
471     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
472 slacapra 1.9
473 gutsche 1.68 files = self.filesbyblock[block]
474     numFilesInBlock = len(files)
475     if (numFilesInBlock <= 0):
476     continue
477     fileCount = 0
478    
479     # ---- New block => New job ---- #
480     parString = "\\{"
481     # counter for number of events in files currently worked on
482     filesEventCount = 0
483     # flag if next while loop should touch new file
484     newFile = 1
485     # job event counter
486     jobSkipEventCount = 0
487 slacapra 1.9
488 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
489     # ---- total # of events or we've gone over all the files in this block ---- #
490     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
491     file = files[fileCount]
492     if newFile :
493     try:
494     numEventsInFile = self.eventsbyfile[file]
495     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
496     # increase filesEventCount
497     filesEventCount += numEventsInFile
498     # Add file to current job
499     parString += '\\\"' + file + '\\\"\,'
500     newFile = 0
501     except KeyError:
502     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
503 slacapra 1.41
504 gutsche 1.38
505 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
506     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
507     # if last file in block
508     if ( fileCount == numFilesInBlock-1 ) :
509     # end job using last file, use remaining events in block
510     # close job and touch new file
511     fullString = parString[:-2]
512     fullString += '\\}'
513     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
514     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
515     self.jobDestination.append(blockSites[block])
516     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
517 gutsche 1.92 # fill jobs of block dictionary
518 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
519 gutsche 1.68 # reset counter
520     jobCount = jobCount + 1
521     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
522     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
523     jobSkipEventCount = 0
524     # reset file
525     parString = "\\{"
526     filesEventCount = 0
527     newFile = 1
528     fileCount += 1
529     else :
530     # go to next file
531     newFile = 1
532     fileCount += 1
533     # if events in file equal to eventsPerJobRequested
534     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
535 gutsche 1.38 # close job and touch new file
536     fullString = parString[:-2]
537     fullString += '\\}'
538 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
539     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
540 gutsche 1.38 self.jobDestination.append(blockSites[block])
541     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
542 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
543 gutsche 1.38 # reset counter
544     jobCount = jobCount + 1
545 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
546     eventsRemaining = eventsRemaining - eventsPerJobRequested
547 gutsche 1.38 jobSkipEventCount = 0
548     # reset file
549     parString = "\\{"
550     filesEventCount = 0
551     newFile = 1
552     fileCount += 1
553 gutsche 1.68
554     # if more events in file remain than eventsPerJobRequested
555 gutsche 1.38 else :
556 gutsche 1.68 # close job but don't touch new file
557     fullString = parString[:-2]
558     fullString += '\\}'
559     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
560     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
561     self.jobDestination.append(blockSites[block])
562     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
563 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
564 gutsche 1.68 # increase counter
565     jobCount = jobCount + 1
566     totalEventCount = totalEventCount + eventsPerJobRequested
567     eventsRemaining = eventsRemaining - eventsPerJobRequested
568     # calculate skip events for last file
569     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
570     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
571     # remove all but the last file
572     filesEventCount = self.eventsbyfile[file]
573     parString = "\\{"
574     parString += '\\\"' + file + '\\\"\,'
575     pass # END if
576     pass # END while (iterate over files in the block)
577 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
578 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
579 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
580 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
581 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
582 slacapra 1.22
583 gutsche 1.92 # screen output
584     screenOutput = "List of jobs and available destination sites:\n\n"
585    
586 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
587     noSiteBlock = []
588     bloskNoSite = []
589    
590 gutsche 1.92 blockCounter = 0
591 gutsche 1.104 for block in blocks:
592     if block in jobsOfBlock.keys() :
593     blockCounter += 1
594 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
595 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
596     noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
597     bloskNoSite.append( blockCounter )
598    
599     common.logger.message(screenOutput)
600 gutsche 1.92
601 mcinquil 1.124 msg = 'WARNING: No sites are hosting any part of data for block:\n '
602     virgola = ""
603     if len(bloskNoSite) > 1:
604     virgola = ","
605     for block in bloskNoSite:
606     msg += ' ' + str(block) + virgola
607    
608     msg += '\n Related jobs:\n '
609     virgola = ""
610     if len(noSiteBlock) > 1:
611     virgola = ","
612     for range_jobs in noSiteBlock:
613     msg += str(range_jobs) + virgola
614     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
615    
616     common.logger.message(msg)
617 gutsche 1.92
618 slacapra 1.9 self.list_of_args = list_of_lists
619     return
620    
621 slacapra 1.21 def jobSplittingNoInput(self):
622 slacapra 1.9 """
623     Perform job splitting based on number of event per job
624     """
625     common.logger.debug(5,'Splitting per events')
626     common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
627 slacapra 1.22 common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
628 slacapra 1.9 common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
629    
630 slacapra 1.10 if (self.total_number_of_events < 0):
631     msg='Cannot split jobs per Events with "-1" as total number of events'
632     raise CrabException(msg)
633    
634 slacapra 1.22 if (self.selectEventsPerJob):
635 spiga 1.65 if (self.selectTotalNumberEvents):
636     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
637     elif(self.selectNumberOfJobs) :
638     self.total_number_of_jobs =self.theNumberOfJobs
639     self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
640    
641 slacapra 1.22 elif (self.selectNumberOfJobs) :
642     self.total_number_of_jobs = self.theNumberOfJobs
643     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
644 spiga 1.65
645 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
646    
647     # is there any remainder?
648     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
649    
650     common.logger.debug(5,'Check '+str(check))
651    
652 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
653 slacapra 1.9 if check > 0:
654 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
655 slacapra 1.9
656 slacapra 1.10 # argument is seed number.$i
657 slacapra 1.9 self.list_of_args = []
658     for i in range(self.total_number_of_jobs):
659 gutsche 1.35 ## Since there is no input, any site is good
660 slacapra 1.86 # self.jobDestination.append(["Any"])
661 spiga 1.42 self.jobDestination.append([""]) #must be empty to write correctly the xml
662 slacapra 1.90 args=[]
663 spiga 1.57 if (self.firstRun):
664     ## pythia first run
665 slacapra 1.86 #self.list_of_args.append([(str(self.firstRun)+str(i))])
666 slacapra 1.90 args.append(str(self.firstRun)+str(i))
667 spiga 1.57 else:
668     ## no first run
669 slacapra 1.86 #self.list_of_args.append([str(i)])
670 slacapra 1.90 args.append(str(i))
671 slacapra 1.23 if (self.sourceSeed):
672 slacapra 1.90 args.append(str(self.sourceSeed)+str(i))
673 slacapra 1.28 if (self.sourceSeedVtx):
674 slacapra 1.90 ## + vtx random seed
675     args.append(str(self.sourceSeedVtx)+str(i))
676     if (self.sourceSeedG4):
677     ## + G4 random seed
678     args.append(str(self.sourceSeedG4)+str(i))
679     if (self.sourceSeedMix):
680     ## + Mix random seed
681     args.append(str(self.sourceSeedMix)+str(i))
682     pass
683     pass
684     self.list_of_args.append(args)
685     pass
686 slacapra 1.86
687 slacapra 1.90 # print self.list_of_args
688 gutsche 1.3
689     return
690    
691 spiga 1.42
692     def jobSplittingForScript(self):#CarlosDaniele
693     """
694     Perform job splitting based on number of job
695     """
696     common.logger.debug(5,'Splitting per job')
697     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
698    
699     self.total_number_of_jobs = self.theNumberOfJobs
700    
701     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
702    
703     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
704    
705     # argument is seed number.$i
706     self.list_of_args = []
707     for i in range(self.total_number_of_jobs):
708     ## Since there is no input, any site is good
709     # self.jobDestination.append(["Any"])
710     self.jobDestination.append([""])
711     ## no random seed
712     self.list_of_args.append([str(i)])
713     return
714    
715 gutsche 1.3 def split(self, jobParams):
716    
717     common.jobDB.load()
718     #### Fabio
719     njobs = self.total_number_of_jobs
720 slacapra 1.9 arglist = self.list_of_args
721 gutsche 1.3 # create the empty structure
722     for i in range(njobs):
723     jobParams.append("")
724    
725     for job in range(njobs):
726 slacapra 1.17 jobParams[job] = arglist[job]
727     # print str(arglist[job])
728     # print jobParams[job]
729 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
730 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
731     common.jobDB.setDestination(job, self.jobDestination[job])
732 gutsche 1.3
733     common.jobDB.save()
734     return
735    
736     def getJobTypeArguments(self, nj, sched):
737 slacapra 1.17 result = ''
738     for i in common.jobDB.arguments(nj):
739     result=result+str(i)+" "
740     return result
741 gutsche 1.3
742     def numberOfJobs(self):
743     # Fabio
744     return self.total_number_of_jobs
745    
746 slacapra 1.1 def getTarBall(self, exe):
747     """
748     Return the TarBall with lib and exe
749     """
750    
751     # if it exist, just return it
752 corvo 1.56 #
753     # Marco. Let's start to use relative path for Boss XML files
754     #
755     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
756 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
757     return self.tgzNameWithPath
758    
759     # Prepare a tar gzipped file with user binaries.
760     self.buildTar_(exe)
761    
762     return string.strip(self.tgzNameWithPath)
763    
764     def buildTar_(self, executable):
765    
766     # First of all declare the user Scram area
767     swArea = self.scram.getSWArea_()
768     #print "swArea = ", swArea
769 slacapra 1.63 # swVersion = self.scram.getSWVersion()
770     # print "swVersion = ", swVersion
771 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
772     #print "swReleaseTop = ", swReleaseTop
773    
774     ## check if working area is release top
775     if swReleaseTop == '' or swArea == swReleaseTop:
776     return
777    
778 slacapra 1.61 import tarfile
779     try: # create tar ball
780     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
781     ## First find the executable
782 slacapra 1.86 if (self.executable != ''):
783 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
784     if ( not exeWithPath ):
785     raise CrabException('User executable '+executable+' not found')
786    
787     ## then check if it's private or not
788     if exeWithPath.find(swReleaseTop) == -1:
789     # the exe is private, so we must ship
790     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
791     path = swArea+'/'
792 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
793     if exeWithPath.find(path) >= 0 :
794     exe = string.replace(exeWithPath, path,'')
795     tar.add(path+exe,os.path.basename(executable))
796     else :
797     tar.add(exeWithPath,os.path.basename(executable))
798 slacapra 1.61 pass
799     else:
800     # the exe is from release, we'll find it on WN
801     pass
802    
803     ## Now get the libraries: only those in local working area
804     libDir = 'lib'
805     lib = swArea+'/' +libDir
806     common.logger.debug(5,"lib "+lib+" to be tarred")
807     if os.path.exists(lib):
808     tar.add(lib,libDir)
809    
810     ## Now check if module dir is present
811     moduleDir = 'module'
812     module = swArea + '/' + moduleDir
813     if os.path.isdir(module):
814     tar.add(module,moduleDir)
815    
816     ## Now check if any data dir(s) is present
817     swAreaLen=len(swArea)
818     for root, dirs, files in os.walk(swArea):
819     if "data" in dirs:
820     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
821     tar.add(root+"/data",root[swAreaLen:]+"/data")
822    
823     ## Add ProdAgent dir to tar
824     paDir = 'ProdAgentApi'
825     pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
826     if os.path.isdir(pa):
827     tar.add(pa,paDir)
828 fanzago 1.93
829     ### FEDE FOR DBS PUBLICATION
830     ## Add PRODCOMMON dir to tar
831     prodcommonDir = 'ProdCommon'
832     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
833     if os.path.isdir(prodcommonPath):
834     tar.add(prodcommonPath,prodcommonDir)
835     #############################
836 slacapra 1.61
837     common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
838     tar.close()
839     except :
840     raise CrabException('Could not create tar-ball')
841 gutsche 1.72
842     ## check for tarball size
843     tarballinfo = os.stat(self.tgzNameWithPath)
844     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
845     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
846    
847 slacapra 1.61 ## create tar-ball with ML stuff
848 corvo 1.58 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
849 slacapra 1.61 try:
850     tar = tarfile.open(self.MLtgzfile, "w:gz")
851     path=os.environ['CRABDIR'] + '/python/'
852     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
853     tar.add(path+file,file)
854     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
855     tar.close()
856     except :
857 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
858    
859 slacapra 1.1 return
860    
861 slacapra 1.97 def additionalInputFileTgz(self):
862     """
863     Put all additional files into a tar ball and return its name
864     """
865     import tarfile
866     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
867     tar = tarfile.open(tarName, "w:gz")
868     for file in self.additional_inbox_files:
869     tar.add(file,string.split(file,'/')[-1])
870     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
871     tar.close()
872     return tarName
873    
874 slacapra 1.1 def wsSetupEnvironment(self, nj):
875     """
876     Returns part of a job script which prepares
877     the execution environment for the job 'nj'.
878     """
879     # Prepare JobType-independent part
880 gutsche 1.3 txt = ''
881    
882     ## OLI_Daniele at this level middleware already known
883    
884     txt += 'if [ $middleware == LCG ]; then \n'
885 fanzago 1.115 txt += ' echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
886     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
887     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
888 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
889     txt += 'elif [ $middleware == OSG ]; then\n'
890 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
891     txt += ' echo "Created working directory: $WORKING_DIR"\n'
892 gutsche 1.3 txt += ' if [ ! -d $WORKING_DIR ] ;then\n'
893 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
894 slacapra 1.90 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
895     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
896     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
897 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
898     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
899     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
900 gutsche 1.3 txt += ' exit 1\n'
901     txt += ' fi\n'
902     txt += '\n'
903     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
904     txt += ' cd $WORKING_DIR\n'
905     txt += self.wsSetupCMSOSGEnvironment_()
906 fanzago 1.115 txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
907     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
908 gutsche 1.3 txt += 'fi\n'
909 slacapra 1.1
910     # Prepare JobType-specific part
911     scram = self.scram.commandName()
912     txt += '\n\n'
913     txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
914     txt += scram+' project CMSSW '+self.version+'\n'
915     txt += 'status=$?\n'
916     txt += 'if [ $status != 0 ] ; then\n'
917 gutsche 1.7 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
918 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
919 gutsche 1.7 txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
920 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
921 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
922     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
923     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
924 gutsche 1.3 ## OLI_Daniele
925     txt += ' if [ $middleware == OSG ]; then \n'
926     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
927     txt += ' cd $RUNTIME_AREA\n'
928     txt += ' /bin/rm -rf $WORKING_DIR\n'
929     txt += ' if [ -d $WORKING_DIR ] ;then\n'
930 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
931     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
932     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
933     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
934 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
935     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
936     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
937 gutsche 1.3 txt += ' fi\n'
938     txt += ' fi \n'
939     txt += ' exit 1 \n'
940 slacapra 1.1 txt += 'fi \n'
941     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
942     txt += 'cd '+self.version+'\n'
943 fanzago 1.99 ########## FEDE FOR DBS2 ######################
944     txt += 'SOFTWARE_DIR=`pwd`\n'
945     txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
946     ###############################################
947 slacapra 1.1 ### needed grep for bug in scramv1 ###
948 corvo 1.58 txt += scram+' runtime -sh\n'
949 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
950 corvo 1.58 txt += 'echo $PATH\n'
951 slacapra 1.1
952     # Handle the arguments:
953     txt += "\n"
954 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
955 slacapra 1.1 txt += "\n"
956 mkirn 1.32 # txt += "narg=$#\n"
957     txt += "if [ $nargs -lt 2 ]\n"
958 slacapra 1.1 txt += "then\n"
959 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
960 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
961 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
962 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
963 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
964     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
965     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
966 gutsche 1.3 ## OLI_Daniele
967     txt += ' if [ $middleware == OSG ]; then \n'
968     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
969     txt += ' cd $RUNTIME_AREA\n'
970     txt += ' /bin/rm -rf $WORKING_DIR\n'
971     txt += ' if [ -d $WORKING_DIR ] ;then\n'
972 fanzago 1.96 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
973     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
974     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
975     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
976 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
977     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
978     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
979 gutsche 1.3 txt += ' fi\n'
980     txt += ' fi \n'
981 slacapra 1.1 txt += " exit 1\n"
982     txt += "fi\n"
983     txt += "\n"
984    
985     # Prepare job-specific part
986     job = common.job_list[nj]
987 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
988     if (self.datasetPath):
989     txt += '\n'
990     txt += 'DatasetPath='+self.datasetPath+'\n'
991    
992     datasetpath_split = self.datasetPath.split("/")
993    
994     txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
995     txt += 'DataTier='+datasetpath_split[2]+'\n'
996 fanzago 1.96 #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
997     txt += 'ApplicationFamily=cmsRun\n'
998 fanzago 1.93
999     else:
1000     txt += 'DatasetPath=MCDataTier\n'
1001     txt += 'PrimaryDataset=null\n'
1002     txt += 'DataTier=null\n'
1003 fanzago 1.96 #txt += 'ProcessedDataset=null\n'
1004 fanzago 1.93 txt += 'ApplicationFamily=MCDataTier\n'
1005 spiga 1.42 if self.pset != None: #CarlosDaniele
1006     pset = os.path.basename(job.configFilename())
1007     txt += '\n'
1008 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
1009 spiga 1.42 if (self.datasetPath): # standard job
1010     #txt += 'InputFiles=$2\n'
1011     txt += 'InputFiles=${args[1]}\n'
1012     txt += 'MaxEvents=${args[2]}\n'
1013     txt += 'SkipEvents=${args[3]}\n'
1014     txt += 'echo "Inputfiles:<$InputFiles>"\n'
1015 slacapra 1.90 txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1016 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1017 slacapra 1.90 txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1018 spiga 1.42 txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1019 slacapra 1.90 txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1020 spiga 1.42 else: # pythia like job
1021 slacapra 1.90 seedIndex=1
1022     if (self.firstRun):
1023     txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
1024 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
1025 slacapra 1.90 txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1026     seedIndex=seedIndex+1
1027    
1028 spiga 1.57 if (self.sourceSeed):
1029 slacapra 1.90 txt += 'Seed=${args['+str(seedIndex)+']}\n'
1030     txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1031     seedIndex=seedIndex+1
1032     ## the following seeds are not always present
1033 spiga 1.42 if (self.sourceSeedVtx):
1034 slacapra 1.90 txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1035 spiga 1.42 txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1036 slacapra 1.90 txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1037     seedIndex += 1
1038     if (self.sourceSeedG4):
1039     txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1040     txt += 'echo "G4Seed: <$G4Seed>"\n'
1041     txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1042     seedIndex += 1
1043     if (self.sourceSeedMix):
1044     txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1045     txt += 'echo "MixSeed: <$mixSeed>"\n'
1046     txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1047     seedIndex += 1
1048     pass
1049     pass
1050     txt += 'mv -f '+pset+' pset.cfg\n'
1051 slacapra 1.1
1052     if len(self.additional_inbox_files) > 0:
1053 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1054     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1055     txt += 'fi\n'
1056 slacapra 1.1 pass
1057    
1058 spiga 1.42 if self.pset != None: #CarlosDaniele
1059     txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1060    
1061     txt += '\n'
1062     txt += 'echo "***** cat pset.cfg *********"\n'
1063     txt += 'cat pset.cfg\n'
1064     txt += 'echo "****** end pset.cfg ********"\n'
1065     txt += '\n'
1066 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1067 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1068     txt += 'echo "PSETHASH = $PSETHASH" \n'
1069 fanzago 1.93 ##############
1070     txt += '\n'
1071     # txt += 'echo "***** cat pset1.cfg *********"\n'
1072     # txt += 'cat pset1.cfg\n'
1073     # txt += 'echo "****** end pset1.cfg ********"\n'
1074 gutsche 1.3 return txt
1075    
1076 slacapra 1.63 def wsBuildExe(self, nj=0):
1077 gutsche 1.3 """
1078     Put in the script the commands to build an executable
1079     or a library.
1080     """
1081    
1082     txt = ""
1083    
1084     if os.path.isfile(self.tgzNameWithPath):
1085     txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
1086     txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1087     txt += 'untar_status=$? \n'
1088     txt += 'if [ $untar_status -ne 0 ]; then \n'
1089     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1090     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
1091 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1092 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
1093     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1094     txt += ' cd $RUNTIME_AREA\n'
1095     txt += ' /bin/rm -rf $WORKING_DIR\n'
1096     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1097 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1098     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
1099     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1100     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1101     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1102     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1103     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1104 gutsche 1.3 txt += ' fi\n'
1105     txt += ' fi \n'
1106     txt += ' \n'
1107 gutsche 1.7 txt += ' exit 1 \n'
1108 gutsche 1.3 txt += 'else \n'
1109     txt += ' echo "Successful untar" \n'
1110     txt += 'fi \n'
1111 gutsche 1.50 txt += '\n'
1112 fanzago 1.93 txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1113 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1114 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
1115 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1116     #txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1117 fanzago 1.93 #txt += ' export PYTHONPATH=ProdAgentApi\n'
1118 gutsche 1.50 txt += 'else\n'
1119 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1120     #txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1121 fanzago 1.93 #txt += ' export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1122     txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1123     ###################
1124 gutsche 1.50 txt += 'fi\n'
1125     txt += '\n'
1126    
1127 gutsche 1.3 pass
1128    
1129 slacapra 1.1 return txt
1130    
1131     def modifySteeringCards(self, nj):
1132     """
1133     modify the card provided by the user,
1134     writing a new card into share dir
1135     """
1136    
1137     def executableName(self):
1138 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1139 spiga 1.42 return "sh "
1140     else:
1141     return self.executable
1142 slacapra 1.1
1143     def executableArgs(self):
1144 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1145 spiga 1.42 return self.scriptExe + " $NJob"
1146 fanzago 1.115 else:
1147     # if >= CMSSW_1_5_X, add -e
1148     version_array = self.scram.getSWVersion().split('_')
1149     major = 0
1150     minor = 0
1151     try:
1152     major = int(version_array[1])
1153     minor = int(version_array[2])
1154     except:
1155     msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1156     raise CrabException(msg)
1157     if major >= 1 and minor >= 5 :
1158     return " -e -p pset.cfg"
1159     else:
1160     return " -p pset.cfg"
1161 slacapra 1.1
1162     def inputSandbox(self, nj):
1163     """
1164     Returns a list of filenames to be put in JDL input sandbox.
1165     """
1166     inp_box = []
1167 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1168     # seen = {}
1169 slacapra 1.1 ## code
1170     if os.path.isfile(self.tgzNameWithPath):
1171     inp_box.append(self.tgzNameWithPath)
1172 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1173     inp_box.append(self.MLtgzfile)
1174 slacapra 1.1 ## config
1175 slacapra 1.70 if not self.pset is None:
1176 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1177 slacapra 1.1 ## additional input files
1178 slacapra 1.97 tgz = self.additionalInputFileTgz()
1179     inp_box.append(tgz)
1180 slacapra 1.1 return inp_box
1181    
1182     def outputSandbox(self, nj):
1183     """
1184     Returns a list of filenames to be put in JDL output sandbox.
1185     """
1186     out_box = []
1187    
1188     ## User Declared output files
1189 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1190 slacapra 1.1 n_out = nj + 1
1191     out_box.append(self.numberFile_(out,str(n_out)))
1192     return out_box
1193    
1194     def prepareSteeringCards(self):
1195     """
1196     Make initial modifications of the user's steering card file.
1197     """
1198     return
1199    
1200     def wsRenameOutput(self, nj):
1201     """
1202     Returns part of a job script which renames the produced files.
1203     """
1204    
1205     txt = '\n'
1206 gutsche 1.7 txt += '# directory content\n'
1207     txt += 'ls \n'
1208 slacapra 1.54
1209     for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1210 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1211     txt += '\n'
1212 gutsche 1.7 txt += '# check output file\n'
1213 slacapra 1.105 # txt += 'ls '+fileWithSuffix+'\n'
1214     # txt += 'ls_result=$?\n'
1215 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1216 fanzago 1.117 ###### FEDE FOR OUTPUT DATA PUBLICATION ########
1217     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1218     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1219     ################################################
1220 slacapra 1.106 txt += 'else\n'
1221 fanzago 1.117 txt += ' exit_status=60302\n'
1222     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1223     ############# FEDE ADDED CHECK FOR OUTPUT #############
1224 mcinquil 1.119 ## MATTY's FIX: the exit option was interrupting the execution
1225 fanzago 1.117 if fileWithSuffix in self.output_file:
1226     txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1227 mcinquil 1.125 txt += ' output_exit_status=$exit_status\n'
1228 mcinquil 1.119 txt += ' # exit $exit_status\n'
1229 fanzago 1.117 #######################################################
1230 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
1231     txt += ' if [ $middleware == OSG ]; then \n'
1232     txt += ' echo "prepare dummy output file"\n'
1233     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1234     txt += ' fi \n'
1235 slacapra 1.1 txt += 'fi\n'
1236 slacapra 1.105 file_list = []
1237     for fileWithSuffix in (self.output_file):
1238     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1239 fanzago 1.117
1240 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1241 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1242 slacapra 1.1 return txt
1243    
1244     def numberFile_(self, file, txt):
1245     """
1246     append _'txt' before last extension of a file
1247     """
1248     p = string.split(file,".")
1249     # take away last extension
1250     name = p[0]
1251     for x in p[1:-1]:
1252 slacapra 1.90 name=name+"."+x
1253 slacapra 1.1 # add "_txt"
1254     if len(p)>1:
1255 slacapra 1.90 ext = p[len(p)-1]
1256     result = name + '_' + txt + "." + ext
1257 slacapra 1.1 else:
1258 slacapra 1.90 result = name + '_' + txt
1259 slacapra 1.1
1260     return result
1261    
1262 slacapra 1.63 def getRequirements(self, nj=[]):
1263 slacapra 1.1 """
1264     return job requirements to add to jdl files
1265     """
1266     req = ''
1267 slacapra 1.47 if self.version:
1268 slacapra 1.10 req='Member("VO-cms-' + \
1269 slacapra 1.47 self.version + \
1270 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1271 farinafa 1.111 ## SL add requirement for OS version only if SL4
1272     #reSL4 = re.compile( r'slc4' )
1273 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1274 gutsche 1.107 req+=' && Member("VO-cms-' + \
1275 slacapra 1.105 self.executable_arch + \
1276     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1277 gutsche 1.35
1278     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1279    
1280 slacapra 1.1 return req
1281 gutsche 1.3
1282     def configFilename(self):
1283     """ return the config filename """
1284     return self.name()+'.cfg'
1285    
1286     ### OLI_DANIELE
1287     def wsSetupCMSOSGEnvironment_(self):
1288     """
1289     Returns part of a job script which is prepares
1290     the execution environment and which is common for all CMS jobs.
1291     """
1292     txt = '\n'
1293     txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n'
1294     txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1295     txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1296 spiga 1.87 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1297 gutsche 1.3 txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1298 mkirn 1.40 txt += ' elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1299     txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1300 spiga 1.87 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1301 mkirn 1.40 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1302 gutsche 1.3 txt += ' else\n'
1303 mkirn 1.40 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1304 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1305     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1306     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1307 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1308     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1309     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1310 gutsche 1.7 txt += ' exit 1\n'
1311 gutsche 1.3 txt += '\n'
1312     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1313     txt += ' cd $RUNTIME_AREA\n'
1314     txt += ' /bin/rm -rf $WORKING_DIR\n'
1315     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1316 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1317     txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1318     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1319     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1320     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1321     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1322     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1323 gutsche 1.3 txt += ' fi\n'
1324     txt += '\n'
1325 gutsche 1.7 txt += ' exit 1\n'
1326 gutsche 1.3 txt += ' fi\n'
1327     txt += '\n'
1328     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1329     txt += ' echo " END SETUP CMS OSG ENVIRONMENT "\n'
1330    
1331     return txt
1332    
1333     ### OLI_DANIELE
1334     def wsSetupCMSLCGEnvironment_(self):
1335     """
1336     Returns part of a job script which is prepares
1337     the execution environment and which is common for all CMS jobs.
1338     """
1339     txt = ' \n'
1340     txt += ' echo " ### SETUP CMS LCG ENVIRONMENT ### "\n'
1341     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1342     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1343     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1344     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1345     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1346 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1347     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1348     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1349 gutsche 1.7 txt += ' exit 1\n'
1350 gutsche 1.3 txt += ' else\n'
1351     txt += ' echo "Sourcing environment... "\n'
1352     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1353     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1354     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1355     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1356     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1357 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1358     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1359     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1360 gutsche 1.7 txt += ' exit 1\n'
1361 gutsche 1.3 txt += ' fi\n'
1362     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1363     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1364     txt += ' result=$?\n'
1365     txt += ' if [ $result -ne 0 ]; then\n'
1366     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1367     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1368     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1369     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1370 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1371     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1372     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1373 gutsche 1.7 txt += ' exit 1\n'
1374 gutsche 1.3 txt += ' fi\n'
1375     txt += ' fi\n'
1376     txt += ' \n'
1377     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1378     txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1379     return txt
1380 gutsche 1.5
1381 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1382     def modifyReport(self, nj):
1383     """
1384     insert the part of the script that modifies the FrameworkJob Report
1385     """
1386 fanzago 1.94
1387 fanzago 1.93 txt = ''
1388 fanzago 1.94 try:
1389     publish_data = int(self.cfg_params['USER.publish_data'])
1390     except KeyError:
1391     publish_data = 0
1392 fanzago 1.122 if (publish_data == 1):
1393     txt += 'echo "Modify Job Report" \n'
1394     #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1395     ################ FEDE FOR DBS2 #############################################
1396     txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1397     #############################################################################
1398     #try:
1399     # publish_data = int(self.cfg_params['USER.publish_data'])
1400     #except KeyError:
1401     # publish_data = 0
1402 fanzago 1.94
1403 fanzago 1.122 txt += 'if [ -z "$SE" ]; then\n'
1404     txt += ' SE="" \n'
1405     txt += 'fi \n'
1406     txt += 'if [ -z "$SE_PATH" ]; then\n'
1407     txt += ' SE_PATH="" \n'
1408     txt += 'fi \n'
1409     txt += 'echo "SE = $SE"\n'
1410     txt += 'echo "SE_PATH = $SE_PATH"\n'
1411 fanzago 1.94
1412 fanzago 1.122 #if (publish_data == 1):
1413 fanzago 1.94 #processedDataset = self.cfg_params['USER.processed_datasetname']
1414     processedDataset = self.cfg_params['USER.publish_data_name']
1415     txt += 'ProcessedDataset='+processedDataset+'\n'
1416     #### LFN=/store/user/<user>/processedDataset_PSETHASH
1417     txt += 'if [ "$SE_PATH" == "" ]; then\n'
1418 fanzago 1.101 #### FEDE: added slash in LFN ##############
1419     txt += ' FOR_LFN=/copy_problems/ \n'
1420 fanzago 1.94 txt += 'else \n'
1421     txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1422 fanzago 1.101 ##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1423 fanzago 1.94 txt += ' FOR_LFN=/store$tmp \n'
1424     txt += 'fi \n'
1425     txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1426     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1427 spiga 1.103 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1428     #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1429     txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1430     txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1431     #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1432    
1433     txt += 'modifyReport_result=$?\n'
1434     txt += 'echo modifyReport_result = $modifyReport_result\n'
1435     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1436     txt += ' exit_status=1\n'
1437     txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1438     txt += 'else\n'
1439     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1440     txt += 'fi\n'
1441 fanzago 1.94 else:
1442 fanzago 1.122 txt += 'echo "no data publication required"\n'
1443     #txt += 'ProcessedDataset=no_data_to_publish \n'
1444 fanzago 1.101 #### FEDE: added slash in LFN ##############
1445 fanzago 1.122 #txt += 'FOR_LFN=/local/ \n'
1446     #txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1447     #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1448 fanzago 1.93 return txt
1449 fanzago 1.99
1450     def cleanEnv(self):
1451     ### OLI_DANIELE
1452     txt = ''
1453     txt += 'if [ $middleware == OSG ]; then\n'
1454     txt += ' cd $RUNTIME_AREA\n'
1455     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1456     txt += ' /bin/rm -rf $WORKING_DIR\n'
1457     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1458     txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1459     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1460     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1461     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1462     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1463     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1464     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1465     txt += ' fi\n'
1466     txt += 'fi\n'
1467     txt += '\n'
1468     return txt
1469 fanzago 1.93
1470 gutsche 1.5 def setParam_(self, param, value):
1471     self._params[param] = value
1472    
1473     def getParams(self):
1474     return self._params
1475 gutsche 1.8
1476     def setTaskid_(self):
1477     self._taskId = self.cfg_params['taskId']
1478    
1479     def getTaskid(self):
1480     return self._taskId
1481 gutsche 1.35
1482     def uniquelist(self, old):
1483     """
1484     remove duplicates from a list
1485     """
1486     nd={}
1487     for e in old:
1488     nd[e]=0
1489     return nd.keys()
1490 mcinquil 1.121
1491    
1492     def checkOut(self, limit):
1493     """
1494     check the dimension of the output files
1495     """
1496     txt = 'echo "*****************************************"\n'
1497     txt += 'echo "** Starting output sandbox limit check **"\n'
1498     txt += 'echo "*****************************************"\n'
1499     allOutFiles = ""
1500     listOutFiles = []
1501     for fileOut in (self.output_file+self.output_file_sandbox):
1502     if fileOut.find('crab_fjr') == -1:
1503     allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1504     listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1505     txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1506     txt += 'ls -gGhrta;\n'
1507     txt += 'sum=0;\n'
1508     txt += 'for file in '+str(allOutFiles)+' ; do\n'
1509     txt += ' if [ -e $file ]; then\n'
1510     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1511     txt += ' sum=`expr $sum + $tt`\n'
1512     txt += ' else\n'
1513     txt += ' echo "WARNING: output file $file not found!"\n'
1514     txt += ' fi\n'
1515     txt += 'done\n'
1516     txt += 'echo "Total Output dimension: $sum";\n'
1517     txt += 'limit='+str(limit)+';\n'
1518     txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1519     txt += 'if [ $limit -lt $sum ]; then\n'
1520     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1521     txt += ' echo " checking the output file sizes..."\n'
1522     """
1523     txt += ' dim=0;\n'
1524     txt += ' exclude=0;\n'
1525     txt += ' for files in '+str(allOutFiles)+' ; do\n'
1526     txt += ' sumTemp=0;\n'
1527     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1528     txt += ' if [ $file != $file2 ]; then\n'
1529     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1530     txt += ' sumTemp=`expr $sumTemp + $tt`;\n'
1531     txt += ' fi\n'
1532     txt += ' done\n'
1533     txt += ' if [ $sumTemp -lt $limit ]; then\n'
1534     txt += ' if [ $dim -lt $sumTemp ]; then\n'
1535     txt += ' dim=$sumTemp;\n'
1536     txt += ' exclude=$file;\n'
1537     txt += ' fi\n'
1538     txt += ' fi\n'
1539     txt += ' done\n'
1540     txt += ' echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1541     """
1542     txt += ' tot=0;\n'
1543     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1544     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1545     txt += ' tot=`expr $tot + $tt`;\n'
1546     txt += ' if [ $limit -lt $tot ]; then\n'
1547     txt += ' tot=`expr $tot - $tt`;\n'
1548     txt += ' fileLast=$file;\n'
1549     txt += ' break;\n'
1550     txt += ' fi\n'
1551     txt += ' done\n'
1552     txt += ' echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1553     txt += ' flag=0;\n'
1554     txt += ' for filess in '+str(allOutFiles)+' ; do\n'
1555     txt += ' if [ $fileLast = $filess ]; then\n'
1556     txt += ' flag=1;\n'
1557     txt += ' fi\n'
1558     txt += ' if [ $flag -eq 1 ]; then\n'
1559     txt += ' rm -f $filess;\n'
1560     txt += ' fi\n'
1561     txt += ' done\n'
1562     txt += ' ls -agGhrt;\n'
1563     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1564     txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1565     txt += ' exit_status=70000;\n'
1566     txt += 'else'
1567     txt += ' echo "Total Output dimension $sum is fine.";\n'
1568     txt += 'fi\n'
1569     txt += 'echo "*****************************************"\n'
1570     txt += 'echo "*** Ending output sandbox limit check ***"\n'
1571     txt += 'echo "*****************************************"\n'
1572     return txt