ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.91
Committed: Tue Jun 19 15:20:53 2007 UTC (17 years, 10 months ago) by slacapra
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_1_5_3_pre1
Changes since 1.90: +4 -4 lines
Log Message:
temporarly comment arch requiremtns for 153_pre1

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5     import common
6 slacapra 1.90 # import PsetManipulator
7 slacapra 1.86 # import DataDiscovery
8     # import DataDiscovery_DBS2
9     # import DataLocation
10 slacapra 1.1 import Scram
11    
12 slacapra 1.70 import os, string, re, shutil, glob
13 slacapra 1.1
14     class Cmssw(JobType):
15 gutsche 1.38 def __init__(self, cfg_params, ncjobs):
16 slacapra 1.1 JobType.__init__(self, 'CMSSW')
17     common.logger.debug(3,'CMSSW::__init__')
18    
19 gutsche 1.3 # Marco.
20     self._params = {}
21     self.cfg_params = cfg_params
22 gutsche 1.38
23 gutsche 1.72 try:
24     self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
25     except KeyError:
26 slacapra 1.86 self.MaxTarBallSize = 9.5
27 gutsche 1.72
28 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
29 gutsche 1.38 self.ncjobs = ncjobs
30    
31 slacapra 1.1 log = common.logger
32    
33     self.scram = Scram.Scram(cfg_params)
34     self.additional_inbox_files = []
35     self.scriptExe = ''
36     self.executable = ''
37 slacapra 1.71 self.executable_arch = self.scram.getArch()
38 slacapra 1.1 self.tgz_name = 'default.tgz'
39 corvo 1.56 self.scriptName = 'CMSSW.sh'
40 spiga 1.42 self.pset = '' #scrip use case Da
41     self.datasetPath = '' #scrip use case Da
42 gutsche 1.3
43 gutsche 1.50 # set FJR file name
44     self.fjrFileName = 'crab_fjr.xml'
45    
46 slacapra 1.1 self.version = self.scram.getSWVersion()
47 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
48 gutsche 1.5 self.setParam_('application', self.version)
49 slacapra 1.47
50 slacapra 1.1 ### collect Data cards
51 gutsche 1.66
52     ## get DBS mode
53     try:
54 slacapra 1.86 self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
55 gutsche 1.66 except KeyError:
56 slacapra 1.86 self.use_dbs_1 = 0
57 gutsche 1.66
58 slacapra 1.1 try:
59 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
60     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
61     if string.lower(tmp)=='none':
62     self.datasetPath = None
63 slacapra 1.21 self.selectNoInput = 1
64 slacapra 1.9 else:
65     self.datasetPath = tmp
66 slacapra 1.21 self.selectNoInput = 0
67 slacapra 1.1 except KeyError:
68 gutsche 1.3 msg = "Error: datasetpath not defined "
69 slacapra 1.1 raise CrabException(msg)
70 gutsche 1.5
71     # ML monitoring
72     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
73 slacapra 1.9 if not self.datasetPath:
74     self.setParam_('dataset', 'None')
75     self.setParam_('owner', 'None')
76     else:
77     datasetpath_split = self.datasetPath.split("/")
78 slacapra 1.86 if self.use_dbs_1 == 1 :
79     self.setParam_('dataset', datasetpath_split[1])
80     self.setParam_('owner', datasetpath_split[-1])
81     else:
82 corvo 1.85 self.setParam_('dataset', datasetpath_split[1])
83     self.setParam_('owner', datasetpath_split[2])
84 gutsche 1.8 self.setTaskid_()
85     self.setParam_('taskId', self.cfg_params['taskId'])
86 gutsche 1.5
87 slacapra 1.1 self.dataTiers = []
88    
89     ## now the application
90     try:
91     self.executable = cfg_params['CMSSW.executable']
92 gutsche 1.5 self.setParam_('exe', self.executable)
93 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
94     msg = "Default executable cmsRun overridden. Switch to " + self.executable
95     log.debug(3,msg)
96     except KeyError:
97     self.executable = 'cmsRun'
98 gutsche 1.5 self.setParam_('exe', self.executable)
99 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
100     log.debug(3,msg)
101     pass
102    
103     try:
104     self.pset = cfg_params['CMSSW.pset']
105     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
106 spiga 1.42 if self.pset.lower() != 'none' :
107     if (not os.path.exists(self.pset)):
108     raise CrabException("User defined PSet file "+self.pset+" does not exist")
109     else:
110     self.pset = None
111 slacapra 1.1 except KeyError:
112     raise CrabException("PSet file missing. Cannot run cmsRun ")
113    
114     # output files
115 slacapra 1.53 ## stuff which must be returned always via sandbox
116     self.output_file_sandbox = []
117    
118     # add fjr report by default via sandbox
119     self.output_file_sandbox.append(self.fjrFileName)
120    
121     # other output files to be returned via sandbox or copied to SE
122 slacapra 1.1 try:
123     self.output_file = []
124     tmp = cfg_params['CMSSW.output_file']
125     if tmp != '':
126     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
127     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
128     for tmp in tmpOutFiles:
129     tmp=string.strip(tmp)
130     self.output_file.append(tmp)
131     pass
132     else:
133 gutsche 1.50 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
134 slacapra 1.1 pass
135     pass
136     except KeyError:
137 gutsche 1.50 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
138 slacapra 1.1 pass
139    
140     # script_exe file as additional file in inputSandbox
141     try:
142 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
143     if self.scriptExe != '':
144     if not os.path.isfile(self.scriptExe):
145 slacapra 1.64 msg ="ERROR. file "+self.scriptExe+" not found"
146 slacapra 1.10 raise CrabException(msg)
147 spiga 1.42 self.additional_inbox_files.append(string.strip(self.scriptExe))
148 slacapra 1.1 except KeyError:
149 spiga 1.42 self.scriptExe = ''
150 slacapra 1.70
151 spiga 1.42 #CarlosDaniele
152     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
153 slacapra 1.70 msg ="Error. script_exe not defined"
154 spiga 1.42 raise CrabException(msg)
155    
156 slacapra 1.1 ## additional input files
157     try:
158 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
159 slacapra 1.70 for tmp in tmpAddFiles:
160     tmp = string.strip(tmp)
161     dirname = ''
162     if not tmp[0]=="/": dirname = "."
163 corvo 1.85 files = []
164     if string.find(tmp,"*")>-1:
165     files = glob.glob(os.path.join(dirname, tmp))
166     if len(files)==0:
167     raise CrabException("No additional input file found with this pattern: "+tmp)
168     else:
169     files.append(tmp)
170 slacapra 1.70 for file in files:
171     if not os.path.exists(file):
172     raise CrabException("Additional input file not found: "+file)
173 slacapra 1.45 pass
174 corvo 1.85 fname = string.split(file, '/')[-1]
175     storedFile = common.work_space.pathForTgz()+'share/'+fname
176 slacapra 1.70 shutil.copyfile(file, storedFile)
177     self.additional_inbox_files.append(string.strip(storedFile))
178 slacapra 1.1 pass
179     pass
180 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
181 slacapra 1.1 except KeyError:
182     pass
183    
184 slacapra 1.9 # files per job
185 slacapra 1.1 try:
186 gutsche 1.35 if (cfg_params['CMSSW.files_per_jobs']):
187     raise CrabException("files_per_jobs no longer supported. Quitting.")
188 gutsche 1.3 except KeyError:
189 gutsche 1.35 pass
190 gutsche 1.3
191 slacapra 1.9 ## Events per job
192 gutsche 1.3 try:
193 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
194 slacapra 1.9 self.selectEventsPerJob = 1
195 gutsche 1.3 except KeyError:
196 slacapra 1.9 self.eventsPerJob = -1
197     self.selectEventsPerJob = 0
198    
199 slacapra 1.22 ## number of jobs
200     try:
201     self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
202     self.selectNumberOfJobs = 1
203     except KeyError:
204     self.theNumberOfJobs = 0
205     self.selectNumberOfJobs = 0
206 slacapra 1.10
207 gutsche 1.35 try:
208     self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
209     self.selectTotalNumberEvents = 1
210     except KeyError:
211     self.total_number_of_events = 0
212     self.selectTotalNumberEvents = 0
213    
214 spiga 1.42 if self.pset != None: #CarlosDaniele
215     if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
216     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
217     raise CrabException(msg)
218     else:
219     if (self.selectNumberOfJobs == 0):
220     msg = 'Must specify number_of_jobs.'
221     raise CrabException(msg)
222 gutsche 1.35
223 slacapra 1.22 ## source seed for pythia
224     try:
225     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
226     except KeyError:
227 slacapra 1.23 self.sourceSeed = None
228     common.logger.debug(5,"No seed given")
229 slacapra 1.22
230 slacapra 1.28 try:
231     self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
232     except KeyError:
233     self.sourceSeedVtx = None
234     common.logger.debug(5,"No vertex seed given")
235 slacapra 1.90
236     try:
237     self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
238     except KeyError:
239     self.sourceSeedG4 = None
240     common.logger.debug(5,"No g4 sim hits seed given")
241    
242     try:
243     self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
244     except KeyError:
245     self.sourceSeedMix = None
246     common.logger.debug(5,"No mix seed given")
247    
248 spiga 1.57 try:
249     self.firstRun = int(cfg_params['CMSSW.first_run'])
250     except KeyError:
251     self.firstRun = None
252     common.logger.debug(5,"No first run given")
253 spiga 1.42 if self.pset != None: #CarlosDaniele
254 slacapra 1.90 import PsetManipulator
255     PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
256 gutsche 1.3
257 slacapra 1.1 #DBSDLS-start
258     ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
259     self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
260     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
261 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
262 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
263 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
264 gutsche 1.35 blockSites = {}
265 slacapra 1.9 if self.datasetPath:
266 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
267 slacapra 1.1 #DBSDLS-end
268    
269     self.tgzNameWithPath = self.getTarBall(self.executable)
270 slacapra 1.10
271 slacapra 1.9 ## Select Splitting
272 spiga 1.42 if self.selectNoInput:
273     if self.pset == None: #CarlosDaniele
274     self.jobSplittingForScript()
275     else:
276     self.jobSplittingNoInput()
277 corvo 1.56 else:
278     self.jobSplittingByBlocks(blockSites)
279 gutsche 1.5
280 slacapra 1.22 # modify Pset
281 spiga 1.42 if self.pset != None: #CarlosDaniele
282 slacapra 1.86 try:
283     if (self.datasetPath): # standard job
284     # allow to processa a fraction of events in a file
285 slacapra 1.90 PsetEdit.inputModule("INPUT")
286     PsetEdit.maxEvent("INPUTMAXEVENTS")
287     PsetEdit.skipEvent("INPUTSKIPEVENTS")
288 slacapra 1.86 else: # pythia like job
289 slacapra 1.90 PsetEdit.maxEvent(self.eventsPerJob)
290 slacapra 1.86 if (self.firstRun):
291 slacapra 1.90 PsetEdit.pythiaFirstRun("INPUTFIRSTRUN") #First Run
292 slacapra 1.86 if (self.sourceSeed) :
293 slacapra 1.90 PsetEdit.pythiaSeed("INPUT")
294 slacapra 1.86 if (self.sourceSeedVtx) :
295 slacapra 1.90 PsetEdit.vtxSeed("INPUTVTX")
296     if (self.sourceSeedG4) :
297     self.PsetEdit.g4Seed("INPUTG4")
298     if (self.sourceSeedMix) :
299     self.PsetEdit.mixSeed("INPUTMIX")
300 slacapra 1.86 # add FrameworkJobReport to parameter-set
301 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
302     PsetEdit.psetWriter(self.configFilename())
303 slacapra 1.86 except:
304     msg='Error while manipuliating ParameterSet: exiting...'
305     raise CrabException(msg)
306 gutsche 1.3
307 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
308    
309 slacapra 1.86 import DataDiscovery
310     import DataDiscovery_DBS2
311     import DataLocation
312 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
313    
314     datasetPath=self.datasetPath
315    
316 slacapra 1.1 ## Contact the DBS
317 slacapra 1.41 common.logger.message("Contacting DBS...")
318 slacapra 1.1 try:
319 gutsche 1.66
320 slacapra 1.86 if self.use_dbs_1 == 1 :
321     self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
322     else :
323 corvo 1.85 self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
324 slacapra 1.1 self.pubdata.fetchDBSInfo()
325    
326 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
327 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
328     raise CrabException(msg)
329 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
330 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
331     raise CrabException(msg)
332 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
333 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
334 slacapra 1.1 raise CrabException(msg)
335 gutsche 1.67 except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
336     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
337     raise CrabException(msg)
338     except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
339     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
340     raise CrabException(msg)
341     except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
342     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
343     raise CrabException(msg)
344 slacapra 1.1
345     ## get list of all required data in the form of dbs paths (dbs path = /dataset/datatier/owner)
346 gutsche 1.3 common.logger.message("Required data are :"+self.datasetPath)
347    
348 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
349 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
350     self.eventsbyfile=self.pubdata.getEventsPerFile()
351 gutsche 1.3
352 slacapra 1.1 ## get max number of events
353     self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
354 gutsche 1.44 common.logger.message("The number of available events is %s\n"%self.maxEvents)
355 slacapra 1.1
356 slacapra 1.41 common.logger.message("Contacting DLS...")
357 slacapra 1.1 ## Contact the DLS and build a list of sites hosting the fileblocks
358     try:
359 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
360 gutsche 1.6 dataloc.fetchDLSInfo()
361 slacapra 1.41 except DataLocation.DataLocationError , ex:
362 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
363     raise CrabException(msg)
364    
365    
366 gutsche 1.35 sites = dataloc.getSites()
367     allSites = []
368     listSites = sites.values()
369 slacapra 1.63 for listSite in listSites:
370     for oneSite in listSite:
371 gutsche 1.35 allSites.append(oneSite)
372     allSites = self.uniquelist(allSites)
373 gutsche 1.3
374 gutsche 1.35 common.logger.message("Sites ("+str(len(allSites))+") hosting part/all of dataset: "+str(allSites))
375     common.logger.debug(6, "List of Sites: "+str(allSites))
376     return sites
377 gutsche 1.3
378 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
379 slacapra 1.9 """
380 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
381     and no more than one block.
382     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
383     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
384     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
385     self.maxEvents, self.filesbyblock
386     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
387     self.total_number_of_jobs - Total # of jobs
388     self.list_of_args - File(s) job will run on (a list of lists)
389     """
390    
391     # ---- Handle the possible job splitting configurations ---- #
392     if (self.selectTotalNumberEvents):
393     totalEventsRequested = self.total_number_of_events
394     if (self.selectEventsPerJob):
395     eventsPerJobRequested = self.eventsPerJob
396     if (self.selectNumberOfJobs):
397     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
398    
399     # If user requested all the events in the dataset
400     if (totalEventsRequested == -1):
401     eventsRemaining=self.maxEvents
402     # If user requested more events than are in the dataset
403     elif (totalEventsRequested > self.maxEvents):
404     eventsRemaining = self.maxEvents
405     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
406     # If user requested less events than are in the dataset
407     else:
408     eventsRemaining = totalEventsRequested
409 slacapra 1.22
410 slacapra 1.41 # If user requested more events per job than are in the dataset
411     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
412     eventsPerJobRequested = self.maxEvents
413    
414 gutsche 1.35 # For user info at end
415     totalEventCount = 0
416 gutsche 1.3
417 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
418     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
419 slacapra 1.22
420 gutsche 1.35 if (self.selectNumberOfJobs):
421     common.logger.message("May not create the exact number_of_jobs requested.")
422 slacapra 1.23
423 gutsche 1.38 if ( self.ncjobs == 'all' ) :
424     totalNumberOfJobs = 999999999
425     else :
426     totalNumberOfJobs = self.ncjobs
427    
428    
429 gutsche 1.35 blocks = blockSites.keys()
430     blockCount = 0
431     # Backup variable in case self.maxEvents counted events in a non-included block
432     numBlocksInDataset = len(blocks)
433 gutsche 1.3
434 gutsche 1.35 jobCount = 0
435     list_of_lists = []
436 gutsche 1.3
437 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
438     # ---- we've met the requested total # of events ---- #
439 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
440 gutsche 1.35 block = blocks[blockCount]
441 gutsche 1.44 blockCount += 1
442    
443 gutsche 1.68 if self.eventsbyblock.has_key(block) :
444     numEventsInBlock = self.eventsbyblock[block]
445     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
446 slacapra 1.9
447 gutsche 1.68 files = self.filesbyblock[block]
448     numFilesInBlock = len(files)
449     if (numFilesInBlock <= 0):
450     continue
451     fileCount = 0
452    
453     # ---- New block => New job ---- #
454     parString = "\\{"
455     # counter for number of events in files currently worked on
456     filesEventCount = 0
457     # flag if next while loop should touch new file
458     newFile = 1
459     # job event counter
460     jobSkipEventCount = 0
461 slacapra 1.9
462 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
463     # ---- total # of events or we've gone over all the files in this block ---- #
464     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
465     file = files[fileCount]
466     if newFile :
467     try:
468     numEventsInFile = self.eventsbyfile[file]
469     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
470     # increase filesEventCount
471     filesEventCount += numEventsInFile
472     # Add file to current job
473     parString += '\\\"' + file + '\\\"\,'
474     newFile = 0
475     except KeyError:
476     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
477 slacapra 1.41
478 gutsche 1.38
479 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
480     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
481     # if last file in block
482     if ( fileCount == numFilesInBlock-1 ) :
483     # end job using last file, use remaining events in block
484     # close job and touch new file
485     fullString = parString[:-2]
486     fullString += '\\}'
487     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
488     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
489     self.jobDestination.append(blockSites[block])
490     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
491     # reset counter
492     jobCount = jobCount + 1
493     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
494     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
495     jobSkipEventCount = 0
496     # reset file
497     parString = "\\{"
498     filesEventCount = 0
499     newFile = 1
500     fileCount += 1
501     else :
502     # go to next file
503     newFile = 1
504     fileCount += 1
505     # if events in file equal to eventsPerJobRequested
506     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
507 gutsche 1.38 # close job and touch new file
508     fullString = parString[:-2]
509     fullString += '\\}'
510 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
511     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
512 gutsche 1.38 self.jobDestination.append(blockSites[block])
513     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
514     # reset counter
515     jobCount = jobCount + 1
516 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
517     eventsRemaining = eventsRemaining - eventsPerJobRequested
518 gutsche 1.38 jobSkipEventCount = 0
519     # reset file
520     parString = "\\{"
521     filesEventCount = 0
522     newFile = 1
523     fileCount += 1
524 gutsche 1.68
525     # if more events in file remain than eventsPerJobRequested
526 gutsche 1.38 else :
527 gutsche 1.68 # close job but don't touch new file
528     fullString = parString[:-2]
529     fullString += '\\}'
530     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
531     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
532     self.jobDestination.append(blockSites[block])
533     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
534     # increase counter
535     jobCount = jobCount + 1
536     totalEventCount = totalEventCount + eventsPerJobRequested
537     eventsRemaining = eventsRemaining - eventsPerJobRequested
538     # calculate skip events for last file
539     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
540     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
541     # remove all but the last file
542     filesEventCount = self.eventsbyfile[file]
543     parString = "\\{"
544     parString += '\\\"' + file + '\\\"\,'
545     pass # END if
546     pass # END while (iterate over files in the block)
547 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
548 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
549 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
550 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
551 mkirn 1.37 common.logger.message("\n"+str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
552 slacapra 1.22
553 slacapra 1.9 self.list_of_args = list_of_lists
554     return
555    
556 slacapra 1.21 def jobSplittingNoInput(self):
557 slacapra 1.9 """
558     Perform job splitting based on number of event per job
559     """
560     common.logger.debug(5,'Splitting per events')
561     common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
562 slacapra 1.22 common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
563 slacapra 1.9 common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
564    
565 slacapra 1.10 if (self.total_number_of_events < 0):
566     msg='Cannot split jobs per Events with "-1" as total number of events'
567     raise CrabException(msg)
568    
569 slacapra 1.22 if (self.selectEventsPerJob):
570 spiga 1.65 if (self.selectTotalNumberEvents):
571     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
572     elif(self.selectNumberOfJobs) :
573     self.total_number_of_jobs =self.theNumberOfJobs
574     self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
575    
576 slacapra 1.22 elif (self.selectNumberOfJobs) :
577     self.total_number_of_jobs = self.theNumberOfJobs
578     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
579 spiga 1.65
580 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
581    
582     # is there any remainder?
583     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
584    
585     common.logger.debug(5,'Check '+str(check))
586    
587 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
588 slacapra 1.9 if check > 0:
589 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
590 slacapra 1.9
591 slacapra 1.10 # argument is seed number.$i
592 slacapra 1.9 self.list_of_args = []
593     for i in range(self.total_number_of_jobs):
594 gutsche 1.35 ## Since there is no input, any site is good
595 slacapra 1.86 # self.jobDestination.append(["Any"])
596 spiga 1.42 self.jobDestination.append([""]) #must be empty to write correctly the xml
597 slacapra 1.90 args=[]
598 spiga 1.57 if (self.firstRun):
599     ## pythia first run
600 slacapra 1.86 #self.list_of_args.append([(str(self.firstRun)+str(i))])
601 slacapra 1.90 args.append(str(self.firstRun)+str(i))
602 spiga 1.57 else:
603     ## no first run
604 slacapra 1.86 #self.list_of_args.append([str(i)])
605 slacapra 1.90 args.append(str(i))
606 slacapra 1.23 if (self.sourceSeed):
607 slacapra 1.90 args.append(str(self.sourceSeed)+str(i))
608 slacapra 1.28 if (self.sourceSeedVtx):
609 slacapra 1.90 ## + vtx random seed
610     args.append(str(self.sourceSeedVtx)+str(i))
611     if (self.sourceSeedG4):
612     ## + G4 random seed
613     args.append(str(self.sourceSeedG4)+str(i))
614     if (self.sourceSeedMix):
615     ## + Mix random seed
616     args.append(str(self.sourceSeedMix)+str(i))
617     pass
618     pass
619     self.list_of_args.append(args)
620     pass
621 slacapra 1.86
622 slacapra 1.90 # print self.list_of_args
623 gutsche 1.3
624     return
625    
626 spiga 1.42
627     def jobSplittingForScript(self):#CarlosDaniele
628     """
629     Perform job splitting based on number of job
630     """
631     common.logger.debug(5,'Splitting per job')
632     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
633    
634     self.total_number_of_jobs = self.theNumberOfJobs
635    
636     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
637    
638     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
639    
640     # argument is seed number.$i
641     self.list_of_args = []
642     for i in range(self.total_number_of_jobs):
643     ## Since there is no input, any site is good
644     # self.jobDestination.append(["Any"])
645     self.jobDestination.append([""])
646     ## no random seed
647     self.list_of_args.append([str(i)])
648     return
649    
650 gutsche 1.3 def split(self, jobParams):
651    
652     common.jobDB.load()
653     #### Fabio
654     njobs = self.total_number_of_jobs
655 slacapra 1.9 arglist = self.list_of_args
656 gutsche 1.3 # create the empty structure
657     for i in range(njobs):
658     jobParams.append("")
659    
660     for job in range(njobs):
661 slacapra 1.17 jobParams[job] = arglist[job]
662     # print str(arglist[job])
663     # print jobParams[job]
664 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
665 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
666     common.jobDB.setDestination(job, self.jobDestination[job])
667 gutsche 1.3
668     common.jobDB.save()
669     return
670    
671     def getJobTypeArguments(self, nj, sched):
672 slacapra 1.17 result = ''
673     for i in common.jobDB.arguments(nj):
674     result=result+str(i)+" "
675     return result
676 gutsche 1.3
677     def numberOfJobs(self):
678     # Fabio
679     return self.total_number_of_jobs
680    
681 slacapra 1.1 def getTarBall(self, exe):
682     """
683     Return the TarBall with lib and exe
684     """
685    
686     # if it exist, just return it
687 corvo 1.56 #
688     # Marco. Let's start to use relative path for Boss XML files
689     #
690     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
691 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
692     return self.tgzNameWithPath
693    
694     # Prepare a tar gzipped file with user binaries.
695     self.buildTar_(exe)
696    
697     return string.strip(self.tgzNameWithPath)
698    
699     def buildTar_(self, executable):
700    
701     # First of all declare the user Scram area
702     swArea = self.scram.getSWArea_()
703     #print "swArea = ", swArea
704 slacapra 1.63 # swVersion = self.scram.getSWVersion()
705     # print "swVersion = ", swVersion
706 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
707     #print "swReleaseTop = ", swReleaseTop
708    
709     ## check if working area is release top
710     if swReleaseTop == '' or swArea == swReleaseTop:
711     return
712    
713 slacapra 1.61 import tarfile
714     try: # create tar ball
715     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
716     ## First find the executable
717 slacapra 1.86 if (self.executable != ''):
718 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
719     if ( not exeWithPath ):
720     raise CrabException('User executable '+executable+' not found')
721    
722     ## then check if it's private or not
723     if exeWithPath.find(swReleaseTop) == -1:
724     # the exe is private, so we must ship
725     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
726     path = swArea+'/'
727 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
728     if exeWithPath.find(path) >= 0 :
729     exe = string.replace(exeWithPath, path,'')
730     tar.add(path+exe,os.path.basename(executable))
731     else :
732     tar.add(exeWithPath,os.path.basename(executable))
733 slacapra 1.61 pass
734     else:
735     # the exe is from release, we'll find it on WN
736     pass
737    
738     ## Now get the libraries: only those in local working area
739     libDir = 'lib'
740     lib = swArea+'/' +libDir
741     common.logger.debug(5,"lib "+lib+" to be tarred")
742     if os.path.exists(lib):
743     tar.add(lib,libDir)
744    
745     ## Now check if module dir is present
746     moduleDir = 'module'
747     module = swArea + '/' + moduleDir
748     if os.path.isdir(module):
749     tar.add(module,moduleDir)
750    
751     ## Now check if any data dir(s) is present
752     swAreaLen=len(swArea)
753     for root, dirs, files in os.walk(swArea):
754     if "data" in dirs:
755     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
756     tar.add(root+"/data",root[swAreaLen:]+"/data")
757    
758     ## Add ProdAgent dir to tar
759     paDir = 'ProdAgentApi'
760     pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
761     if os.path.isdir(pa):
762     tar.add(pa,paDir)
763    
764     common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
765     tar.close()
766     except :
767     raise CrabException('Could not create tar-ball')
768 gutsche 1.72
769     ## check for tarball size
770     tarballinfo = os.stat(self.tgzNameWithPath)
771     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
772     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
773    
774 slacapra 1.61 ## create tar-ball with ML stuff
775 corvo 1.58 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
776 slacapra 1.61 try:
777     tar = tarfile.open(self.MLtgzfile, "w:gz")
778     path=os.environ['CRABDIR'] + '/python/'
779     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
780     tar.add(path+file,file)
781     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
782     tar.close()
783     except :
784 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
785    
786 slacapra 1.1 return
787    
788     def wsSetupEnvironment(self, nj):
789     """
790     Returns part of a job script which prepares
791     the execution environment for the job 'nj'.
792     """
793     # Prepare JobType-independent part
794 gutsche 1.3 txt = ''
795    
796     ## OLI_Daniele at this level middleware already known
797    
798     txt += 'if [ $middleware == LCG ]; then \n'
799     txt += self.wsSetupCMSLCGEnvironment_()
800     txt += 'elif [ $middleware == OSG ]; then\n'
801 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
802     txt += ' echo "Created working directory: $WORKING_DIR"\n'
803 gutsche 1.3 txt += ' if [ ! -d $WORKING_DIR ] ;then\n'
804 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
805 slacapra 1.90 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
806     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
807     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
808 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
809     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
810     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
811 gutsche 1.3 txt += ' exit 1\n'
812     txt += ' fi\n'
813     txt += '\n'
814     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
815     txt += ' cd $WORKING_DIR\n'
816     txt += self.wsSetupCMSOSGEnvironment_()
817     txt += 'fi\n'
818 slacapra 1.1
819     # Prepare JobType-specific part
820     scram = self.scram.commandName()
821     txt += '\n\n'
822     txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
823 slacapra 1.86 txt += 'echo "Setting SCRAM_ARCH='+self.executable_arch+'"\n'
824     txt += 'export SCRAM_ARCH='+self.executable_arch+'\n'
825 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
826     txt += 'status=$?\n'
827     txt += 'if [ $status != 0 ] ; then\n'
828 gutsche 1.7 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
829 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
830 gutsche 1.7 txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
831 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
832 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
833     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
834     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
835 gutsche 1.3 ## OLI_Daniele
836     txt += ' if [ $middleware == OSG ]; then \n'
837     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
838     txt += ' cd $RUNTIME_AREA\n'
839     txt += ' /bin/rm -rf $WORKING_DIR\n'
840     txt += ' if [ -d $WORKING_DIR ] ;then\n'
841 slacapra 1.90 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
842     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
843     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
844     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
845 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
846     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
847     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
848 gutsche 1.3 txt += ' fi\n'
849     txt += ' fi \n'
850     txt += ' exit 1 \n'
851 slacapra 1.1 txt += 'fi \n'
852     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
853     txt += 'cd '+self.version+'\n'
854     ### needed grep for bug in scramv1 ###
855 corvo 1.58 txt += scram+' runtime -sh\n'
856 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
857 corvo 1.58 txt += 'echo $PATH\n'
858 slacapra 1.1
859     # Handle the arguments:
860     txt += "\n"
861 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
862 slacapra 1.1 txt += "\n"
863 mkirn 1.32 # txt += "narg=$#\n"
864     txt += "if [ $nargs -lt 2 ]\n"
865 slacapra 1.1 txt += "then\n"
866 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
867 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
868 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
869 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
870 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
871     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
872     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
873 gutsche 1.3 ## OLI_Daniele
874     txt += ' if [ $middleware == OSG ]; then \n'
875     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
876     txt += ' cd $RUNTIME_AREA\n'
877     txt += ' /bin/rm -rf $WORKING_DIR\n'
878     txt += ' if [ -d $WORKING_DIR ] ;then\n'
879 slacapra 1.90 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
880     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
881     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
882     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
883 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
884     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
885     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
886 gutsche 1.3 txt += ' fi\n'
887     txt += ' fi \n'
888 slacapra 1.1 txt += " exit 1\n"
889     txt += "fi\n"
890     txt += "\n"
891    
892     # Prepare job-specific part
893     job = common.job_list[nj]
894 spiga 1.42 if self.pset != None: #CarlosDaniele
895     pset = os.path.basename(job.configFilename())
896     txt += '\n'
897     if (self.datasetPath): # standard job
898     #txt += 'InputFiles=$2\n'
899     txt += 'InputFiles=${args[1]}\n'
900     txt += 'MaxEvents=${args[2]}\n'
901     txt += 'SkipEvents=${args[3]}\n'
902     txt += 'echo "Inputfiles:<$InputFiles>"\n'
903 slacapra 1.90 txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
904 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
905 slacapra 1.90 txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
906 spiga 1.42 txt += 'echo "SkipEvents:<$SkipEvents>"\n'
907 slacapra 1.90 txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
908 spiga 1.42 else: # pythia like job
909 slacapra 1.90 seedIndex=1
910     if (self.firstRun):
911     txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
912 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
913 slacapra 1.90 txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
914     seedIndex=seedIndex+1
915    
916 spiga 1.57 if (self.sourceSeed):
917 slacapra 1.90 txt += 'Seed=${args['+str(seedIndex)+']}\n'
918     txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
919     seedIndex=seedIndex+1
920     ## the following seeds are not always present
921 spiga 1.42 if (self.sourceSeedVtx):
922 slacapra 1.90 txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
923 spiga 1.42 txt += 'echo "VtxSeed: <$VtxSeed>"\n'
924 slacapra 1.90 txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
925     seedIndex += 1
926     if (self.sourceSeedG4):
927     txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
928     txt += 'echo "G4Seed: <$G4Seed>"\n'
929     txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
930     seedIndex += 1
931     if (self.sourceSeedMix):
932     txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
933     txt += 'echo "MixSeed: <$mixSeed>"\n'
934     txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
935     seedIndex += 1
936     pass
937     pass
938     txt += 'mv -f '+pset+' pset.cfg\n'
939 slacapra 1.1
940     if len(self.additional_inbox_files) > 0:
941     for file in self.additional_inbox_files:
942 mkirn 1.31 relFile = file.split("/")[-1]
943     txt += 'if [ -e $RUNTIME_AREA/'+relFile+' ] ; then\n'
944     txt += ' cp $RUNTIME_AREA/'+relFile+' .\n'
945     txt += ' chmod +x '+relFile+'\n'
946 slacapra 1.1 txt += 'fi\n'
947     pass
948    
949 spiga 1.42 if self.pset != None: #CarlosDaniele
950     txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
951    
952     txt += '\n'
953     txt += 'echo "***** cat pset.cfg *********"\n'
954     txt += 'cat pset.cfg\n'
955     txt += 'echo "****** end pset.cfg ********"\n'
956     txt += '\n'
957 gutsche 1.3 return txt
958    
959 slacapra 1.63 def wsBuildExe(self, nj=0):
960 gutsche 1.3 """
961     Put in the script the commands to build an executable
962     or a library.
963     """
964    
965     txt = ""
966    
967     if os.path.isfile(self.tgzNameWithPath):
968     txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
969     txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
970     txt += 'untar_status=$? \n'
971     txt += 'if [ $untar_status -ne 0 ]; then \n'
972     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
973     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
974 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
975 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
976     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
977     txt += ' cd $RUNTIME_AREA\n'
978     txt += ' /bin/rm -rf $WORKING_DIR\n'
979     txt += ' if [ -d $WORKING_DIR ] ;then\n'
980 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
981     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
982     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
983     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
984     txt += ' rm -f $RUNTIME_AREA/$repo \n'
985     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
986     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
987 gutsche 1.3 txt += ' fi\n'
988     txt += ' fi \n'
989     txt += ' \n'
990 gutsche 1.7 txt += ' exit 1 \n'
991 gutsche 1.3 txt += 'else \n'
992     txt += ' echo "Successful untar" \n'
993     txt += 'fi \n'
994 gutsche 1.50 txt += '\n'
995     txt += 'echo "Include ProdAgentApi in PYTHONPATH"\n'
996     txt += 'if [ -z "$PYTHONPATH" ]; then\n'
997     txt += ' export PYTHONPATH=ProdAgentApi\n'
998     txt += 'else\n'
999     txt += ' export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1000     txt += 'fi\n'
1001     txt += '\n'
1002    
1003 gutsche 1.3 pass
1004    
1005 slacapra 1.1 return txt
1006    
1007     def modifySteeringCards(self, nj):
1008     """
1009     modify the card provided by the user,
1010     writing a new card into share dir
1011     """
1012    
1013     def executableName(self):
1014 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1015 spiga 1.42 return "sh "
1016     else:
1017     return self.executable
1018 slacapra 1.1
1019     def executableArgs(self):
1020 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1021 spiga 1.42 return self.scriptExe + " $NJob"
1022     else:
1023     return " -p pset.cfg"
1024 slacapra 1.1
1025     def inputSandbox(self, nj):
1026     """
1027     Returns a list of filenames to be put in JDL input sandbox.
1028     """
1029     inp_box = []
1030 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1031     # seen = {}
1032 slacapra 1.1 ## code
1033     if os.path.isfile(self.tgzNameWithPath):
1034     inp_box.append(self.tgzNameWithPath)
1035 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1036     inp_box.append(self.MLtgzfile)
1037 slacapra 1.1 ## config
1038 slacapra 1.70 if not self.pset is None:
1039 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1040 slacapra 1.1 ## additional input files
1041 slacapra 1.70 for file in self.additional_inbox_files:
1042     inp_box.append(file)
1043 slacapra 1.1 return inp_box
1044    
1045     def outputSandbox(self, nj):
1046     """
1047     Returns a list of filenames to be put in JDL output sandbox.
1048     """
1049     out_box = []
1050    
1051     ## User Declared output files
1052 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1053 slacapra 1.1 n_out = nj + 1
1054     out_box.append(self.numberFile_(out,str(n_out)))
1055     return out_box
1056    
1057     def prepareSteeringCards(self):
1058     """
1059     Make initial modifications of the user's steering card file.
1060     """
1061     return
1062    
1063     def wsRenameOutput(self, nj):
1064     """
1065     Returns part of a job script which renames the produced files.
1066     """
1067    
1068     txt = '\n'
1069 gutsche 1.7 txt += '# directory content\n'
1070     txt += 'ls \n'
1071 slacapra 1.54
1072     for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1073 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1074     txt += '\n'
1075 gutsche 1.7 txt += '# check output file\n'
1076 slacapra 1.1 txt += 'ls '+fileWithSuffix+'\n'
1077 fanzago 1.18 txt += 'ls_result=$?\n'
1078     txt += 'if [ $ls_result -ne 0 ] ; then\n'
1079 spiga 1.88 #txt += ' JOB_EXIT_STATUS=60302\n'
1080     ### FEDE
1081     txt += ' exit_status=60302\n'
1082     ####
1083 fanzago 1.18 txt += ' echo "ERROR: Problem with output file"\n'
1084 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
1085     txt += ' if [ $middleware == OSG ]; then \n'
1086     txt += ' echo "prepare dummy output file"\n'
1087     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1088     txt += ' fi \n'
1089 slacapra 1.1 txt += 'else\n'
1090     txt += ' cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1091     txt += 'fi\n'
1092    
1093 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1094 fanzago 1.18 txt += 'cd $RUNTIME_AREA\n'
1095 gutsche 1.3 ### OLI_DANIELE
1096     txt += 'if [ $middleware == OSG ]; then\n'
1097     txt += ' cd $RUNTIME_AREA\n'
1098     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1099     txt += ' /bin/rm -rf $WORKING_DIR\n'
1100     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1101 slacapra 1.90 txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1102     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1103     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1104     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1105 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1106     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1107     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1108 gutsche 1.3 txt += ' fi\n'
1109     txt += 'fi\n'
1110     txt += '\n'
1111 slacapra 1.54
1112     file_list = ''
1113     ## Add to filelist only files to be possibly copied to SE
1114     for fileWithSuffix in self.output_file:
1115     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1116     file_list=file_list+output_file_num+' '
1117     file_list=file_list[:-1]
1118     txt += 'file_list="'+file_list+'"\n'
1119    
1120 slacapra 1.1 return txt
1121    
1122     def numberFile_(self, file, txt):
1123     """
1124     append _'txt' before last extension of a file
1125     """
1126     p = string.split(file,".")
1127     # take away last extension
1128     name = p[0]
1129     for x in p[1:-1]:
1130 slacapra 1.90 name=name+"."+x
1131 slacapra 1.1 # add "_txt"
1132     if len(p)>1:
1133 slacapra 1.90 ext = p[len(p)-1]
1134     result = name + '_' + txt + "." + ext
1135 slacapra 1.1 else:
1136 slacapra 1.90 result = name + '_' + txt
1137 slacapra 1.1
1138     return result
1139    
1140 slacapra 1.63 def getRequirements(self, nj=[]):
1141 slacapra 1.1 """
1142     return job requirements to add to jdl files
1143     """
1144     req = ''
1145 slacapra 1.47 if self.version:
1146 slacapra 1.10 req='Member("VO-cms-' + \
1147 slacapra 1.47 self.version + \
1148 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1149 slacapra 1.91 # if self.executable_arch:
1150     # req='Member("VO-cms-' + \
1151     # self.executable_arch + \
1152     # '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1153 gutsche 1.35
1154     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1155    
1156 slacapra 1.1 return req
1157 gutsche 1.3
1158     def configFilename(self):
1159     """ return the config filename """
1160     return self.name()+'.cfg'
1161    
1162     ### OLI_DANIELE
1163     def wsSetupCMSOSGEnvironment_(self):
1164     """
1165     Returns part of a job script which is prepares
1166     the execution environment and which is common for all CMS jobs.
1167     """
1168     txt = '\n'
1169     txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n'
1170     txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1171     txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1172 spiga 1.87 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1173 gutsche 1.3 txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1174 mkirn 1.40 txt += ' elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1175     txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1176 spiga 1.87 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1177 mkirn 1.40 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1178 gutsche 1.3 txt += ' else\n'
1179 mkirn 1.40 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1180 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1181     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1182     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1183 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1184     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1185     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1186 gutsche 1.7 txt += ' exit 1\n'
1187 gutsche 1.3 txt += '\n'
1188     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1189     txt += ' cd $RUNTIME_AREA\n'
1190     txt += ' /bin/rm -rf $WORKING_DIR\n'
1191     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1192 slacapra 1.90 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1193     txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1194     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1195     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1196 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1197     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1198     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1199 gutsche 1.3 txt += ' fi\n'
1200     txt += '\n'
1201 gutsche 1.7 txt += ' exit 1\n'
1202 gutsche 1.3 txt += ' fi\n'
1203     txt += '\n'
1204     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1205     txt += ' echo " END SETUP CMS OSG ENVIRONMENT "\n'
1206    
1207     return txt
1208    
1209     ### OLI_DANIELE
1210     def wsSetupCMSLCGEnvironment_(self):
1211     """
1212     Returns part of a job script which is prepares
1213     the execution environment and which is common for all CMS jobs.
1214     """
1215     txt = ' \n'
1216     txt += ' echo " ### SETUP CMS LCG ENVIRONMENT ### "\n'
1217     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1218     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1219     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1220     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1221     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1222 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1223     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1224     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1225 gutsche 1.7 txt += ' exit 1\n'
1226 gutsche 1.3 txt += ' else\n'
1227     txt += ' echo "Sourcing environment... "\n'
1228     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1229     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1230     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1231     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1232     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1233 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1234     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1235     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1236 gutsche 1.7 txt += ' exit 1\n'
1237 gutsche 1.3 txt += ' fi\n'
1238     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1239     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1240     txt += ' result=$?\n'
1241     txt += ' if [ $result -ne 0 ]; then\n'
1242     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1243     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1244     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1245     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1246 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1247     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1248     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1249 gutsche 1.7 txt += ' exit 1\n'
1250 gutsche 1.3 txt += ' fi\n'
1251     txt += ' fi\n'
1252     txt += ' \n'
1253     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1254     txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1255     return txt
1256 gutsche 1.5
1257     def setParam_(self, param, value):
1258     self._params[param] = value
1259    
1260     def getParams(self):
1261     return self._params
1262 gutsche 1.8
1263     def setTaskid_(self):
1264     self._taskId = self.cfg_params['taskId']
1265    
1266     def getTaskid(self):
1267     return self._taskId
1268 gutsche 1.35
1269     #######################################################################
1270     def uniquelist(self, old):
1271     """
1272     remove duplicates from a list
1273     """
1274     nd={}
1275     for e in old:
1276     nd[e]=0
1277     return nd.keys()