ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.78
Committed: Mon Apr 23 15:17:49 2007 UTC (18 years ago) by slacapra
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_1_5_1_pre1
Changes since 1.77: +1 -1 lines
Log Message:
input sandbox is set by default to 9.5 Mb: actual liit is 10, 1/2 Mb is for crab stuff

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5     import common
6 gutsche 1.3 import PsetManipulator
7 slacapra 1.41 import DataDiscovery
8 gutsche 1.66 import DataDiscovery_DBS2
9 slacapra 1.41 import DataLocation
10 slacapra 1.1 import Scram
11    
12 slacapra 1.70 import os, string, re, shutil, glob
13 slacapra 1.1
14     class Cmssw(JobType):
15 gutsche 1.38 def __init__(self, cfg_params, ncjobs):
16 slacapra 1.1 JobType.__init__(self, 'CMSSW')
17     common.logger.debug(3,'CMSSW::__init__')
18    
19 gutsche 1.3 # Marco.
20     self._params = {}
21     self.cfg_params = cfg_params
22 gutsche 1.38
23 gutsche 1.72 try:
24     self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
25     except KeyError:
26 slacapra 1.78 self.MaxTarBallSize = 9.5 # actual (23-Apr-2007) limit is 10 Mb
27 gutsche 1.72
28 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
29 gutsche 1.38 self.ncjobs = ncjobs
30    
31 slacapra 1.1 log = common.logger
32    
33     self.scram = Scram.Scram(cfg_params)
34     self.additional_inbox_files = []
35     self.scriptExe = ''
36     self.executable = ''
37 slacapra 1.71 self.executable_arch = self.scram.getArch()
38 slacapra 1.1 self.tgz_name = 'default.tgz'
39 corvo 1.56 self.scriptName = 'CMSSW.sh'
40 spiga 1.42 self.pset = '' #scrip use case Da
41     self.datasetPath = '' #scrip use case Da
42 gutsche 1.3
43 gutsche 1.50 # set FJR file name
44     self.fjrFileName = 'crab_fjr.xml'
45    
46 slacapra 1.1 self.version = self.scram.getSWVersion()
47 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
48 gutsche 1.5 self.setParam_('application', self.version)
49 slacapra 1.47
50 slacapra 1.1 ### collect Data cards
51 gutsche 1.66
52     ## get DBS mode
53     try:
54     self.use_dbs_2 = int(self.cfg_params['CMSSW.use_dbs_2'])
55     except KeyError:
56     self.use_dbs_2 = 0
57    
58 slacapra 1.1 try:
59 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
60     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
61     if string.lower(tmp)=='none':
62     self.datasetPath = None
63 slacapra 1.21 self.selectNoInput = 1
64 slacapra 1.9 else:
65     self.datasetPath = tmp
66 slacapra 1.21 self.selectNoInput = 0
67 slacapra 1.1 except KeyError:
68 gutsche 1.3 msg = "Error: datasetpath not defined "
69 slacapra 1.1 raise CrabException(msg)
70 gutsche 1.5
71     # ML monitoring
72     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
73 slacapra 1.9 if not self.datasetPath:
74     self.setParam_('dataset', 'None')
75     self.setParam_('owner', 'None')
76     else:
77     datasetpath_split = self.datasetPath.split("/")
78 gutsche 1.75 if self.use_dbs_2 == 1 :
79     self.setParam_('dataset', datasetpath_split[1])
80     self.setParam_('owner', datasetpath_split[2])
81     else :
82     self.setParam_('dataset', datasetpath_split[1])
83     self.setParam_('owner', datasetpath_split[-1])
84 slacapra 1.9
85 gutsche 1.8 self.setTaskid_()
86     self.setParam_('taskId', self.cfg_params['taskId'])
87 gutsche 1.5
88 slacapra 1.1 self.dataTiers = []
89    
90     ## now the application
91     try:
92     self.executable = cfg_params['CMSSW.executable']
93 gutsche 1.5 self.setParam_('exe', self.executable)
94 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
95     msg = "Default executable cmsRun overridden. Switch to " + self.executable
96     log.debug(3,msg)
97     except KeyError:
98     self.executable = 'cmsRun'
99 gutsche 1.5 self.setParam_('exe', self.executable)
100 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
101     log.debug(3,msg)
102     pass
103    
104     try:
105     self.pset = cfg_params['CMSSW.pset']
106     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
107 spiga 1.42 if self.pset.lower() != 'none' :
108     if (not os.path.exists(self.pset)):
109     raise CrabException("User defined PSet file "+self.pset+" does not exist")
110     else:
111     self.pset = None
112 slacapra 1.1 except KeyError:
113     raise CrabException("PSet file missing. Cannot run cmsRun ")
114    
115     # output files
116 slacapra 1.53 ## stuff which must be returned always via sandbox
117     self.output_file_sandbox = []
118    
119     # add fjr report by default via sandbox
120     self.output_file_sandbox.append(self.fjrFileName)
121    
122     # other output files to be returned via sandbox or copied to SE
123 slacapra 1.1 try:
124     self.output_file = []
125     tmp = cfg_params['CMSSW.output_file']
126     if tmp != '':
127     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
128     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
129     for tmp in tmpOutFiles:
130     tmp=string.strip(tmp)
131     self.output_file.append(tmp)
132     pass
133     else:
134 gutsche 1.50 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
135 slacapra 1.1 pass
136     pass
137     except KeyError:
138 gutsche 1.50 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
139 slacapra 1.1 pass
140    
141     # script_exe file as additional file in inputSandbox
142     try:
143 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
144     if self.scriptExe != '':
145     if not os.path.isfile(self.scriptExe):
146 slacapra 1.64 msg ="ERROR. file "+self.scriptExe+" not found"
147 slacapra 1.10 raise CrabException(msg)
148 spiga 1.42 self.additional_inbox_files.append(string.strip(self.scriptExe))
149 slacapra 1.1 except KeyError:
150 spiga 1.42 self.scriptExe = ''
151 slacapra 1.70
152 spiga 1.42 #CarlosDaniele
153     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
154 slacapra 1.70 msg ="Error. script_exe not defined"
155 spiga 1.42 raise CrabException(msg)
156    
157 slacapra 1.1 ## additional input files
158     try:
159 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
160 slacapra 1.70 for tmp in tmpAddFiles:
161     tmp = string.strip(tmp)
162     dirname = ''
163     if not tmp[0]=="/": dirname = "."
164 slacapra 1.76 files = []
165     if string.find(tmp,"*")>-1:
166     files = glob.glob(os.path.join(dirname, tmp))
167     if len(files)==0:
168     raise CrabException("No additional input file found with this pattern: "+tmp)
169     else: files.append(tmp)
170 slacapra 1.70 for file in files:
171     if not os.path.exists(file):
172     raise CrabException("Additional input file not found: "+file)
173 slacapra 1.45 pass
174 slacapra 1.70 storedFile = common.work_space.shareDir()+file
175     shutil.copyfile(file, storedFile)
176     self.additional_inbox_files.append(string.strip(storedFile))
177 slacapra 1.1 pass
178     pass
179 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
180 slacapra 1.1 except KeyError:
181     pass
182    
183 slacapra 1.9 # files per job
184 slacapra 1.1 try:
185 gutsche 1.35 if (cfg_params['CMSSW.files_per_jobs']):
186     raise CrabException("files_per_jobs no longer supported. Quitting.")
187 gutsche 1.3 except KeyError:
188 gutsche 1.35 pass
189 gutsche 1.3
190 slacapra 1.9 ## Events per job
191 gutsche 1.3 try:
192 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
193 slacapra 1.9 self.selectEventsPerJob = 1
194 gutsche 1.3 except KeyError:
195 slacapra 1.9 self.eventsPerJob = -1
196     self.selectEventsPerJob = 0
197    
198 slacapra 1.22 ## number of jobs
199     try:
200     self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
201     self.selectNumberOfJobs = 1
202     except KeyError:
203     self.theNumberOfJobs = 0
204     self.selectNumberOfJobs = 0
205 slacapra 1.10
206 gutsche 1.35 try:
207     self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
208     self.selectTotalNumberEvents = 1
209     except KeyError:
210     self.total_number_of_events = 0
211     self.selectTotalNumberEvents = 0
212    
213 spiga 1.42 if self.pset != None: #CarlosDaniele
214     if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
215     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
216     raise CrabException(msg)
217     else:
218     if (self.selectNumberOfJobs == 0):
219     msg = 'Must specify number_of_jobs.'
220     raise CrabException(msg)
221 gutsche 1.35
222 slacapra 1.22 ## source seed for pythia
223     try:
224     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
225     except KeyError:
226 slacapra 1.23 self.sourceSeed = None
227     common.logger.debug(5,"No seed given")
228 slacapra 1.22
229 slacapra 1.28 try:
230     self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
231     except KeyError:
232     self.sourceSeedVtx = None
233     common.logger.debug(5,"No vertex seed given")
234 slacapra 1.77
235     try:
236     self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
237     except KeyError:
238     self.sourceSeedG4 = None
239     common.logger.debug(5,"No g4 sim hits seed given")
240    
241     try:
242     self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
243     except KeyError:
244     self.sourceSeedMix = None
245     common.logger.debug(5,"No mix seed given")
246    
247 spiga 1.57 try:
248     self.firstRun = int(cfg_params['CMSSW.first_run'])
249     except KeyError:
250     self.firstRun = None
251     common.logger.debug(5,"No first run given")
252 spiga 1.42 if self.pset != None: #CarlosDaniele
253     self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
254 gutsche 1.3
255 slacapra 1.1 #DBSDLS-start
256     ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
257     self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
258     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
259 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
260 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
261 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
262 gutsche 1.35 blockSites = {}
263 slacapra 1.9 if self.datasetPath:
264 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
265 slacapra 1.1 #DBSDLS-end
266    
267     self.tgzNameWithPath = self.getTarBall(self.executable)
268 slacapra 1.10
269 slacapra 1.9 ## Select Splitting
270 spiga 1.42 if self.selectNoInput:
271     if self.pset == None: #CarlosDaniele
272     self.jobSplittingForScript()
273     else:
274     self.jobSplittingNoInput()
275 corvo 1.56 else:
276     self.jobSplittingByBlocks(blockSites)
277 gutsche 1.5
278 slacapra 1.22 # modify Pset
279 spiga 1.42 if self.pset != None: #CarlosDaniele
280 slacapra 1.77 # try:
281     if (self.datasetPath): # standard job
282     # allow to processa a fraction of events in a file
283     self.PsetEdit.inputModule("INPUT")
284     self.PsetEdit.maxEvent("INPUTMAXEVENTS")
285     self.PsetEdit.skipEvent("INPUTSKIPEVENTS")
286     else: # pythia like job
287     self.PsetEdit.maxEvent(self.eventsPerJob)
288     if (self.firstRun):
289     self.PsetEdit.pythiaFirstRun("INPUTFIRSTRUN") #First Run
290     if (self.sourceSeed) :
291     self.PsetEdit.pythiaSeed("INPUT")
292     if (self.sourceSeedVtx) :
293     self.PsetEdit.vtxSeed("INPUTVTX")
294     if (self.sourceSeedG4) :
295     self.PsetEdit.g4Seed("INPUTG4")
296     if (self.sourceSeedMix) :
297     self.PsetEdit.mixSeed("INPUTMIX")
298     # add FrameworkJobReport to parameter-set
299     self.PsetEdit.addCrabFJR(self.fjrFileName)
300     self.PsetEdit.psetWriter(self.configFilename())
301     # except:
302     # msg='Error while manipuliating ParameterSet: exiting...'
303     # raise CrabException(msg)
304 gutsche 1.3
305 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
306    
307 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
308    
309     datasetPath=self.datasetPath
310    
311 slacapra 1.1 ## Contact the DBS
312 slacapra 1.41 common.logger.message("Contacting DBS...")
313 slacapra 1.1 try:
314 gutsche 1.66
315     if self.use_dbs_2 == 1 :
316     self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
317     else :
318     self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
319 slacapra 1.1 self.pubdata.fetchDBSInfo()
320    
321 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
322 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
323     raise CrabException(msg)
324 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
325 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
326     raise CrabException(msg)
327 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
328 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
329 slacapra 1.1 raise CrabException(msg)
330 gutsche 1.67 except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
331     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
332     raise CrabException(msg)
333     except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
334     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
335     raise CrabException(msg)
336     except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
337     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
338     raise CrabException(msg)
339 slacapra 1.1
340     ## get list of all required data in the form of dbs paths (dbs path = /dataset/datatier/owner)
341 gutsche 1.3 common.logger.message("Required data are :"+self.datasetPath)
342    
343 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
344 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
345     self.eventsbyfile=self.pubdata.getEventsPerFile()
346 gutsche 1.3
347 slacapra 1.1 ## get max number of events
348     self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
349 gutsche 1.44 common.logger.message("The number of available events is %s\n"%self.maxEvents)
350 slacapra 1.1
351 slacapra 1.41 common.logger.message("Contacting DLS...")
352 slacapra 1.1 ## Contact the DLS and build a list of sites hosting the fileblocks
353     try:
354 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
355 gutsche 1.6 dataloc.fetchDLSInfo()
356 slacapra 1.41 except DataLocation.DataLocationError , ex:
357 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
358     raise CrabException(msg)
359    
360    
361 gutsche 1.35 sites = dataloc.getSites()
362     allSites = []
363     listSites = sites.values()
364 slacapra 1.63 for listSite in listSites:
365     for oneSite in listSite:
366 gutsche 1.35 allSites.append(oneSite)
367     allSites = self.uniquelist(allSites)
368 gutsche 1.3
369 gutsche 1.35 common.logger.message("Sites ("+str(len(allSites))+") hosting part/all of dataset: "+str(allSites))
370     common.logger.debug(6, "List of Sites: "+str(allSites))
371     return sites
372 gutsche 1.3
373 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
374 slacapra 1.9 """
375 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
376     and no more than one block.
377     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
378     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
379     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
380     self.maxEvents, self.filesbyblock
381     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
382     self.total_number_of_jobs - Total # of jobs
383     self.list_of_args - File(s) job will run on (a list of lists)
384     """
385    
386     # ---- Handle the possible job splitting configurations ---- #
387     if (self.selectTotalNumberEvents):
388     totalEventsRequested = self.total_number_of_events
389     if (self.selectEventsPerJob):
390     eventsPerJobRequested = self.eventsPerJob
391     if (self.selectNumberOfJobs):
392     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
393    
394     # If user requested all the events in the dataset
395     if (totalEventsRequested == -1):
396     eventsRemaining=self.maxEvents
397     # If user requested more events than are in the dataset
398     elif (totalEventsRequested > self.maxEvents):
399     eventsRemaining = self.maxEvents
400     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
401     # If user requested less events than are in the dataset
402     else:
403     eventsRemaining = totalEventsRequested
404 slacapra 1.22
405 slacapra 1.41 # If user requested more events per job than are in the dataset
406     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
407     eventsPerJobRequested = self.maxEvents
408    
409 gutsche 1.35 # For user info at end
410     totalEventCount = 0
411 gutsche 1.3
412 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
413     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
414 slacapra 1.22
415 gutsche 1.35 if (self.selectNumberOfJobs):
416     common.logger.message("May not create the exact number_of_jobs requested.")
417 slacapra 1.23
418 gutsche 1.38 if ( self.ncjobs == 'all' ) :
419     totalNumberOfJobs = 999999999
420     else :
421     totalNumberOfJobs = self.ncjobs
422    
423    
424 gutsche 1.35 blocks = blockSites.keys()
425     blockCount = 0
426     # Backup variable in case self.maxEvents counted events in a non-included block
427     numBlocksInDataset = len(blocks)
428 gutsche 1.3
429 gutsche 1.35 jobCount = 0
430     list_of_lists = []
431 gutsche 1.3
432 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
433     # ---- we've met the requested total # of events ---- #
434 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
435 gutsche 1.35 block = blocks[blockCount]
436 gutsche 1.44 blockCount += 1
437    
438 gutsche 1.68 if self.eventsbyblock.has_key(block) :
439     numEventsInBlock = self.eventsbyblock[block]
440     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
441 slacapra 1.9
442 gutsche 1.68 files = self.filesbyblock[block]
443     numFilesInBlock = len(files)
444     if (numFilesInBlock <= 0):
445     continue
446     fileCount = 0
447    
448     # ---- New block => New job ---- #
449     parString = "\\{"
450     # counter for number of events in files currently worked on
451     filesEventCount = 0
452     # flag if next while loop should touch new file
453     newFile = 1
454     # job event counter
455     jobSkipEventCount = 0
456 slacapra 1.9
457 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
458     # ---- total # of events or we've gone over all the files in this block ---- #
459     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
460     file = files[fileCount]
461     if newFile :
462     try:
463     numEventsInFile = self.eventsbyfile[file]
464     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
465     # increase filesEventCount
466     filesEventCount += numEventsInFile
467     # Add file to current job
468     parString += '\\\"' + file + '\\\"\,'
469     newFile = 0
470     except KeyError:
471     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
472 slacapra 1.41
473 gutsche 1.38
474 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
475     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
476     # if last file in block
477     if ( fileCount == numFilesInBlock-1 ) :
478     # end job using last file, use remaining events in block
479     # close job and touch new file
480     fullString = parString[:-2]
481     fullString += '\\}'
482     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
483     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
484     self.jobDestination.append(blockSites[block])
485     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
486     # reset counter
487     jobCount = jobCount + 1
488     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
489     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
490     jobSkipEventCount = 0
491     # reset file
492     parString = "\\{"
493     filesEventCount = 0
494     newFile = 1
495     fileCount += 1
496     else :
497     # go to next file
498     newFile = 1
499     fileCount += 1
500     # if events in file equal to eventsPerJobRequested
501     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
502 gutsche 1.38 # close job and touch new file
503     fullString = parString[:-2]
504     fullString += '\\}'
505 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
506     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
507 gutsche 1.38 self.jobDestination.append(blockSites[block])
508     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
509     # reset counter
510     jobCount = jobCount + 1
511 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
512     eventsRemaining = eventsRemaining - eventsPerJobRequested
513 gutsche 1.38 jobSkipEventCount = 0
514     # reset file
515     parString = "\\{"
516     filesEventCount = 0
517     newFile = 1
518     fileCount += 1
519 gutsche 1.68
520     # if more events in file remain than eventsPerJobRequested
521 gutsche 1.38 else :
522 gutsche 1.68 # close job but don't touch new file
523     fullString = parString[:-2]
524     fullString += '\\}'
525     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
526     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
527     self.jobDestination.append(blockSites[block])
528     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
529     # increase counter
530     jobCount = jobCount + 1
531     totalEventCount = totalEventCount + eventsPerJobRequested
532     eventsRemaining = eventsRemaining - eventsPerJobRequested
533     # calculate skip events for last file
534     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
535     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
536     # remove all but the last file
537     filesEventCount = self.eventsbyfile[file]
538     parString = "\\{"
539     parString += '\\\"' + file + '\\\"\,'
540     pass # END if
541     pass # END while (iterate over files in the block)
542 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
543 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
544 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
545 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
546 mkirn 1.37 common.logger.message("\n"+str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
547 slacapra 1.22
548 slacapra 1.9 self.list_of_args = list_of_lists
549     return
550    
551 slacapra 1.21 def jobSplittingNoInput(self):
552 slacapra 1.9 """
553     Perform job splitting based on number of event per job
554     """
555     common.logger.debug(5,'Splitting per events')
556     common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
557 slacapra 1.22 common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
558 slacapra 1.9 common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
559    
560 slacapra 1.10 if (self.total_number_of_events < 0):
561     msg='Cannot split jobs per Events with "-1" as total number of events'
562     raise CrabException(msg)
563    
564 slacapra 1.22 if (self.selectEventsPerJob):
565 spiga 1.65 if (self.selectTotalNumberEvents):
566     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
567     elif(self.selectNumberOfJobs) :
568     self.total_number_of_jobs =self.theNumberOfJobs
569     self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
570    
571 slacapra 1.22 elif (self.selectNumberOfJobs) :
572     self.total_number_of_jobs = self.theNumberOfJobs
573     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
574 spiga 1.65
575 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
576    
577     # is there any remainder?
578     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
579    
580     common.logger.debug(5,'Check '+str(check))
581    
582 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
583 slacapra 1.9 if check > 0:
584 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
585 slacapra 1.9
586 slacapra 1.10 # argument is seed number.$i
587 slacapra 1.9 self.list_of_args = []
588     for i in range(self.total_number_of_jobs):
589 gutsche 1.35 ## Since there is no input, any site is good
590 spiga 1.42 self.jobDestination.append([""]) #must be empty to write correctly the xml
591 slacapra 1.77 args=[]
592 spiga 1.57 if (self.firstRun):
593     ## pythia first run
594 slacapra 1.77 args.append(str(self.firstRun)+str(i))
595 spiga 1.57 else:
596     ## no first run
597 slacapra 1.77 args.append(str(i))
598 slacapra 1.23 if (self.sourceSeed):
599 slacapra 1.77 args.append(str(self.sourceSeed)+str(i))
600 slacapra 1.28 if (self.sourceSeedVtx):
601 slacapra 1.77 ## + vtx random seed
602     args.append(str(self.sourceSeedVtx)+str(i))
603     if (self.sourceSeedG4):
604     ## + G4 random seed
605     args.append(str(self.sourceSeedG4)+str(i))
606     if (self.sourceSeedMix):
607     ## + Mix random seed
608     args.append(str(self.sourceSeedMix)+str(i))
609     pass
610     pass
611     self.list_of_args.append(args)
612     pass
613    
614     #common.logger.debug(5,"Arguments list (pythia-like job):"+str(self.list_of_args))
615 gutsche 1.3
616     return
617    
618 spiga 1.42
619     def jobSplittingForScript(self):#CarlosDaniele
620     """
621     Perform job splitting based on number of job
622     """
623     common.logger.debug(5,'Splitting per job')
624     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
625    
626     self.total_number_of_jobs = self.theNumberOfJobs
627    
628     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
629    
630     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
631    
632     # argument is seed number.$i
633     self.list_of_args = []
634     for i in range(self.total_number_of_jobs):
635     ## Since there is no input, any site is good
636     # self.jobDestination.append(["Any"])
637     self.jobDestination.append([""])
638     ## no random seed
639     self.list_of_args.append([str(i)])
640     return
641    
642 gutsche 1.3 def split(self, jobParams):
643    
644     common.jobDB.load()
645     #### Fabio
646     njobs = self.total_number_of_jobs
647 slacapra 1.9 arglist = self.list_of_args
648 gutsche 1.3 # create the empty structure
649     for i in range(njobs):
650     jobParams.append("")
651    
652     for job in range(njobs):
653 slacapra 1.17 jobParams[job] = arglist[job]
654     # print str(arglist[job])
655     # print jobParams[job]
656 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
657 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
658     common.jobDB.setDestination(job, self.jobDestination[job])
659 gutsche 1.3
660     common.jobDB.save()
661     return
662    
663     def getJobTypeArguments(self, nj, sched):
664 slacapra 1.17 result = ''
665     for i in common.jobDB.arguments(nj):
666     result=result+str(i)+" "
667     return result
668 gutsche 1.3
669     def numberOfJobs(self):
670     # Fabio
671     return self.total_number_of_jobs
672    
673 slacapra 1.1 def getTarBall(self, exe):
674     """
675     Return the TarBall with lib and exe
676     """
677    
678     # if it exist, just return it
679 corvo 1.56 #
680     # Marco. Let's start to use relative path for Boss XML files
681     #
682     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
683 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
684     return self.tgzNameWithPath
685    
686     # Prepare a tar gzipped file with user binaries.
687     self.buildTar_(exe)
688    
689     return string.strip(self.tgzNameWithPath)
690    
691     def buildTar_(self, executable):
692    
693     # First of all declare the user Scram area
694     swArea = self.scram.getSWArea_()
695     #print "swArea = ", swArea
696 slacapra 1.63 # swVersion = self.scram.getSWVersion()
697     # print "swVersion = ", swVersion
698 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
699     #print "swReleaseTop = ", swReleaseTop
700    
701     ## check if working area is release top
702     if swReleaseTop == '' or swArea == swReleaseTop:
703     return
704    
705 slacapra 1.61 import tarfile
706     try: # create tar ball
707     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
708     ## First find the executable
709 gutsche 1.73 if (executable != ''):
710 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
711     if ( not exeWithPath ):
712     raise CrabException('User executable '+executable+' not found')
713    
714     ## then check if it's private or not
715     if exeWithPath.find(swReleaseTop) == -1:
716     # the exe is private, so we must ship
717     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
718     path = swArea+'/'
719 gutsche 1.74 # distinguish case when script is in user project area or given by full path somewhere else
720     if exeWithPath.find(path) >= 0 :
721     exe = string.replace(exeWithPath, path,'')
722     tar.add(path+exe,os.path.basename(executable))
723     else :
724     tar.add(exeWithPath,os.path.basename(executable))
725 slacapra 1.61 pass
726     else:
727     # the exe is from release, we'll find it on WN
728     pass
729    
730     ## Now get the libraries: only those in local working area
731     libDir = 'lib'
732     lib = swArea+'/' +libDir
733     common.logger.debug(5,"lib "+lib+" to be tarred")
734     if os.path.exists(lib):
735     tar.add(lib,libDir)
736    
737     ## Now check if module dir is present
738     moduleDir = 'module'
739     module = swArea + '/' + moduleDir
740     if os.path.isdir(module):
741     tar.add(module,moduleDir)
742    
743     ## Now check if any data dir(s) is present
744     swAreaLen=len(swArea)
745     for root, dirs, files in os.walk(swArea):
746     if "data" in dirs:
747     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
748     tar.add(root+"/data",root[swAreaLen:]+"/data")
749    
750     ## Add ProdAgent dir to tar
751     paDir = 'ProdAgentApi'
752     pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
753     if os.path.isdir(pa):
754     tar.add(pa,paDir)
755    
756     common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
757     tar.close()
758     except :
759     raise CrabException('Could not create tar-ball')
760 gutsche 1.72
761     ## check for tarball size
762     tarballinfo = os.stat(self.tgzNameWithPath)
763     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
764     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
765    
766 slacapra 1.61 ## create tar-ball with ML stuff
767 corvo 1.58 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
768 slacapra 1.61 try:
769     tar = tarfile.open(self.MLtgzfile, "w:gz")
770     path=os.environ['CRABDIR'] + '/python/'
771     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
772     tar.add(path+file,file)
773     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
774     tar.close()
775     except :
776 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
777    
778 slacapra 1.1 return
779    
780     def wsSetupEnvironment(self, nj):
781     """
782     Returns part of a job script which prepares
783     the execution environment for the job 'nj'.
784     """
785     # Prepare JobType-independent part
786 gutsche 1.3 txt = ''
787    
788     ## OLI_Daniele at this level middleware already known
789    
790     txt += 'if [ $middleware == LCG ]; then \n'
791     txt += self.wsSetupCMSLCGEnvironment_()
792     txt += 'elif [ $middleware == OSG ]; then\n'
793 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
794     txt += ' echo "Created working directory: $WORKING_DIR"\n'
795 gutsche 1.3 txt += ' if [ ! -d $WORKING_DIR ] ;then\n'
796 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
797     txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
798     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
799     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
800 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
801     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
802     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
803 gutsche 1.3 txt += ' exit 1\n'
804     txt += ' fi\n'
805     txt += '\n'
806     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
807     txt += ' cd $WORKING_DIR\n'
808     txt += self.wsSetupCMSOSGEnvironment_()
809     txt += 'fi\n'
810 slacapra 1.1
811     # Prepare JobType-specific part
812     scram = self.scram.commandName()
813     txt += '\n\n'
814     txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
815     txt += scram+' project CMSSW '+self.version+'\n'
816     txt += 'status=$?\n'
817     txt += 'if [ $status != 0 ] ; then\n'
818 gutsche 1.7 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
819 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
820 gutsche 1.7 txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
821 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
822 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
823     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
824     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
825 gutsche 1.3 ## OLI_Daniele
826     txt += ' if [ $middleware == OSG ]; then \n'
827     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
828     txt += ' cd $RUNTIME_AREA\n'
829     txt += ' /bin/rm -rf $WORKING_DIR\n'
830     txt += ' if [ -d $WORKING_DIR ] ;then\n'
831 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
832     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
833     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
834     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
835 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
836     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
837     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
838 gutsche 1.3 txt += ' fi\n'
839     txt += ' fi \n'
840     txt += ' exit 1 \n'
841 slacapra 1.1 txt += 'fi \n'
842     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
843 slacapra 1.71 txt += 'export SCRAM_ARCH='+self.executable_arch+'\n'
844 slacapra 1.1 txt += 'cd '+self.version+'\n'
845     ### needed grep for bug in scramv1 ###
846 corvo 1.58 txt += scram+' runtime -sh\n'
847 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
848 corvo 1.58 txt += 'echo $PATH\n'
849 slacapra 1.1
850     # Handle the arguments:
851     txt += "\n"
852 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
853 slacapra 1.1 txt += "\n"
854 mkirn 1.32 # txt += "narg=$#\n"
855     txt += "if [ $nargs -lt 2 ]\n"
856 slacapra 1.1 txt += "then\n"
857 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
858 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
859 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
860 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
861 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
862     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
863     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
864 gutsche 1.3 ## OLI_Daniele
865     txt += ' if [ $middleware == OSG ]; then \n'
866     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
867     txt += ' cd $RUNTIME_AREA\n'
868     txt += ' /bin/rm -rf $WORKING_DIR\n'
869     txt += ' if [ -d $WORKING_DIR ] ;then\n'
870 gutsche 1.7 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
871     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
872     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
873     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
874 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
875     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
876     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
877 gutsche 1.3 txt += ' fi\n'
878     txt += ' fi \n'
879 slacapra 1.1 txt += " exit 1\n"
880     txt += "fi\n"
881     txt += "\n"
882    
883     # Prepare job-specific part
884     job = common.job_list[nj]
885 spiga 1.42 if self.pset != None: #CarlosDaniele
886     pset = os.path.basename(job.configFilename())
887     txt += '\n'
888 slacapra 1.77 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
889 spiga 1.42 if (self.datasetPath): # standard job
890     #txt += 'InputFiles=$2\n'
891     txt += 'InputFiles=${args[1]}\n'
892     txt += 'MaxEvents=${args[2]}\n'
893     txt += 'SkipEvents=${args[3]}\n'
894     txt += 'echo "Inputfiles:<$InputFiles>"\n'
895 slacapra 1.77 txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
896 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
897 slacapra 1.77 txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
898 spiga 1.42 txt += 'echo "SkipEvents:<$SkipEvents>"\n'
899 slacapra 1.77 txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
900 spiga 1.42 else: # pythia like job
901 slacapra 1.77 seedIndex=1
902     if (self.firstRun):
903     txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
904 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
905 slacapra 1.77 txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
906     seedIndex += 1
907 spiga 1.57 if (self.sourceSeed):
908 slacapra 1.77 txt += 'Seed=${args['+str(seedIndex)+']}\n'
909 spiga 1.42 txt += 'echo "Seed: <$Seed>"\n'
910 slacapra 1.77 txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
911     seedIndex += 1
912     ## the following seeds are not always present
913 spiga 1.42 if (self.sourceSeedVtx):
914 slacapra 1.77 txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
915 spiga 1.42 txt += 'echo "VtxSeed: <$VtxSeed>"\n'
916 slacapra 1.77 txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
917     seedIndex += 1
918     if (self.sourceSeedG4):
919     txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
920     txt += 'echo "G4Seed: <$G4Seed>"\n'
921     txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
922     seedIndex += 1
923     if (self.sourceSeedMix):
924     txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
925     txt += 'echo "MixSeed: <$mixSeed>"\n'
926     txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
927     seedIndex += 1
928     pass
929     pass
930     txt += 'mv -f '+pset+' pset.cfg\n'
931 slacapra 1.24
932 slacapra 1.1
933     if len(self.additional_inbox_files) > 0:
934     for file in self.additional_inbox_files:
935 mkirn 1.31 relFile = file.split("/")[-1]
936     txt += 'if [ -e $RUNTIME_AREA/'+relFile+' ] ; then\n'
937     txt += ' cp $RUNTIME_AREA/'+relFile+' .\n'
938     txt += ' chmod +x '+relFile+'\n'
939 slacapra 1.1 txt += 'fi\n'
940     pass
941    
942 spiga 1.42 if self.pset != None: #CarlosDaniele
943     txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
944    
945     txt += '\n'
946     txt += 'echo "***** cat pset.cfg *********"\n'
947     txt += 'cat pset.cfg\n'
948     txt += 'echo "****** end pset.cfg ********"\n'
949     txt += '\n'
950     # txt += 'echo "***** cat pset1.cfg *********"\n'
951     # txt += 'cat pset1.cfg\n'
952     # txt += 'echo "****** end pset1.cfg ********"\n'
953 gutsche 1.3 return txt
954    
955 slacapra 1.63 def wsBuildExe(self, nj=0):
956 gutsche 1.3 """
957     Put in the script the commands to build an executable
958     or a library.
959     """
960    
961     txt = ""
962    
963     if os.path.isfile(self.tgzNameWithPath):
964     txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
965     txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
966     txt += 'untar_status=$? \n'
967     txt += 'if [ $untar_status -ne 0 ]; then \n'
968     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
969     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
970 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
971 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
972     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
973     txt += ' cd $RUNTIME_AREA\n'
974     txt += ' /bin/rm -rf $WORKING_DIR\n'
975     txt += ' if [ -d $WORKING_DIR ] ;then\n'
976 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
977     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
978     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
979     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
980     txt += ' rm -f $RUNTIME_AREA/$repo \n'
981     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
982     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
983 gutsche 1.3 txt += ' fi\n'
984     txt += ' fi \n'
985     txt += ' \n'
986 gutsche 1.7 txt += ' exit 1 \n'
987 gutsche 1.3 txt += 'else \n'
988     txt += ' echo "Successful untar" \n'
989     txt += 'fi \n'
990 gutsche 1.50 txt += '\n'
991     txt += 'echo "Include ProdAgentApi in PYTHONPATH"\n'
992     txt += 'if [ -z "$PYTHONPATH" ]; then\n'
993     txt += ' export PYTHONPATH=ProdAgentApi\n'
994     txt += 'else\n'
995     txt += ' export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
996     txt += 'fi\n'
997     txt += '\n'
998    
999 gutsche 1.3 pass
1000    
1001 slacapra 1.1 return txt
1002    
1003     def modifySteeringCards(self, nj):
1004     """
1005     modify the card provided by the user,
1006     writing a new card into share dir
1007     """
1008    
1009     def executableName(self):
1010 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1011 spiga 1.42 return "sh "
1012     else:
1013     return self.executable
1014 slacapra 1.1
1015     def executableArgs(self):
1016 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1017 spiga 1.42 return self.scriptExe + " $NJob"
1018     else:
1019     return " -p pset.cfg"
1020 slacapra 1.1
1021     def inputSandbox(self, nj):
1022     """
1023     Returns a list of filenames to be put in JDL input sandbox.
1024     """
1025     inp_box = []
1026 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1027     # seen = {}
1028 slacapra 1.1 ## code
1029     if os.path.isfile(self.tgzNameWithPath):
1030     inp_box.append(self.tgzNameWithPath)
1031 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1032     inp_box.append(self.MLtgzfile)
1033 slacapra 1.1 ## config
1034 slacapra 1.70 if not self.pset is None:
1035 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1036 slacapra 1.1 ## additional input files
1037 slacapra 1.70 for file in self.additional_inbox_files:
1038     inp_box.append(file)
1039 slacapra 1.1 return inp_box
1040    
1041     def outputSandbox(self, nj):
1042     """
1043     Returns a list of filenames to be put in JDL output sandbox.
1044     """
1045     out_box = []
1046    
1047     ## User Declared output files
1048 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1049 slacapra 1.1 n_out = nj + 1
1050     out_box.append(self.numberFile_(out,str(n_out)))
1051     return out_box
1052    
1053     def prepareSteeringCards(self):
1054     """
1055     Make initial modifications of the user's steering card file.
1056     """
1057     return
1058    
1059     def wsRenameOutput(self, nj):
1060     """
1061     Returns part of a job script which renames the produced files.
1062     """
1063    
1064     txt = '\n'
1065 gutsche 1.7 txt += '# directory content\n'
1066     txt += 'ls \n'
1067 slacapra 1.54
1068     for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1069 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1070     txt += '\n'
1071 gutsche 1.7 txt += '# check output file\n'
1072 slacapra 1.1 txt += 'ls '+fileWithSuffix+'\n'
1073 fanzago 1.18 txt += 'ls_result=$?\n'
1074     txt += 'if [ $ls_result -ne 0 ] ; then\n'
1075     txt += ' echo "ERROR: Problem with output file"\n'
1076 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
1077     txt += ' if [ $middleware == OSG ]; then \n'
1078     txt += ' echo "prepare dummy output file"\n'
1079     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1080     txt += ' fi \n'
1081 slacapra 1.1 txt += 'else\n'
1082     txt += ' cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1083     txt += 'fi\n'
1084    
1085 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1086 fanzago 1.18 txt += 'cd $RUNTIME_AREA\n'
1087 gutsche 1.3 ### OLI_DANIELE
1088     txt += 'if [ $middleware == OSG ]; then\n'
1089     txt += ' cd $RUNTIME_AREA\n'
1090     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1091     txt += ' /bin/rm -rf $WORKING_DIR\n'
1092     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1093 gutsche 1.7 txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1094     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1095     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1096     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1097 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1098     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1099     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1100 gutsche 1.3 txt += ' fi\n'
1101     txt += 'fi\n'
1102     txt += '\n'
1103 slacapra 1.54
1104     file_list = ''
1105     ## Add to filelist only files to be possibly copied to SE
1106     for fileWithSuffix in self.output_file:
1107     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1108     file_list=file_list+output_file_num+' '
1109     file_list=file_list[:-1]
1110     txt += 'file_list="'+file_list+'"\n'
1111    
1112 slacapra 1.1 return txt
1113    
1114     def numberFile_(self, file, txt):
1115     """
1116     append _'txt' before last extension of a file
1117     """
1118     p = string.split(file,".")
1119     # take away last extension
1120     name = p[0]
1121     for x in p[1:-1]:
1122     name=name+"."+x
1123     # add "_txt"
1124     if len(p)>1:
1125     ext = p[len(p)-1]
1126     result = name + '_' + txt + "." + ext
1127     else:
1128     result = name + '_' + txt
1129    
1130     return result
1131    
1132 slacapra 1.63 def getRequirements(self, nj=[]):
1133 slacapra 1.1 """
1134     return job requirements to add to jdl files
1135     """
1136     req = ''
1137 slacapra 1.47 if self.version:
1138 slacapra 1.10 req='Member("VO-cms-' + \
1139 slacapra 1.47 self.version + \
1140 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1141 gutsche 1.35
1142     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1143    
1144 slacapra 1.1 return req
1145 gutsche 1.3
1146     def configFilename(self):
1147     """ return the config filename """
1148     return self.name()+'.cfg'
1149    
1150     ### OLI_DANIELE
1151     def wsSetupCMSOSGEnvironment_(self):
1152     """
1153     Returns part of a job script which is prepares
1154     the execution environment and which is common for all CMS jobs.
1155     """
1156     txt = '\n'
1157     txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n'
1158     txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1159     txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1160     txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1161 mkirn 1.40 txt += ' elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1162     txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1163     txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1164 gutsche 1.3 txt += ' else\n'
1165 mkirn 1.40 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1166 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1167     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1168     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1169 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1170     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1171     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1172 gutsche 1.7 txt += ' exit 1\n'
1173 gutsche 1.3 txt += '\n'
1174     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1175     txt += ' cd $RUNTIME_AREA\n'
1176     txt += ' /bin/rm -rf $WORKING_DIR\n'
1177     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1178 mkirn 1.40 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1179 gutsche 1.7 txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1180     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1181     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1182 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1183     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1184     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1185 gutsche 1.3 txt += ' fi\n'
1186     txt += '\n'
1187 gutsche 1.7 txt += ' exit 1\n'
1188 gutsche 1.3 txt += ' fi\n'
1189     txt += '\n'
1190     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1191     txt += ' echo " END SETUP CMS OSG ENVIRONMENT "\n'
1192    
1193     return txt
1194    
1195     ### OLI_DANIELE
1196     def wsSetupCMSLCGEnvironment_(self):
1197     """
1198     Returns part of a job script which is prepares
1199     the execution environment and which is common for all CMS jobs.
1200     """
1201     txt = ' \n'
1202     txt += ' echo " ### SETUP CMS LCG ENVIRONMENT ### "\n'
1203     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1204     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1205     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1206     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1207     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1208 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1209     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1210     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1211 gutsche 1.7 txt += ' exit 1\n'
1212 gutsche 1.3 txt += ' else\n'
1213     txt += ' echo "Sourcing environment... "\n'
1214     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1215     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1216     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1217     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1218     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1219 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1220     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1221     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1222 gutsche 1.7 txt += ' exit 1\n'
1223 gutsche 1.3 txt += ' fi\n'
1224     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1225     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1226     txt += ' result=$?\n'
1227     txt += ' if [ $result -ne 0 ]; then\n'
1228     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1229     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1230     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1231     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1232 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1233     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1234     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1235 gutsche 1.7 txt += ' exit 1\n'
1236 gutsche 1.3 txt += ' fi\n'
1237     txt += ' fi\n'
1238     txt += ' \n'
1239     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1240     txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1241     return txt
1242 gutsche 1.5
1243     def setParam_(self, param, value):
1244     self._params[param] = value
1245    
1246     def getParams(self):
1247     return self._params
1248 gutsche 1.8
1249     def setTaskid_(self):
1250     self._taskId = self.cfg_params['taskId']
1251    
1252     def getTaskid(self):
1253     return self._taskId
1254 gutsche 1.35
1255     #######################################################################
1256     def uniquelist(self, old):
1257     """
1258     remove duplicates from a list
1259     """
1260     nd={}
1261     for e in old:
1262     nd[e]=0
1263     return nd.keys()