ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.121
Committed: Wed Oct 3 14:52:13 2007 UTC (17 years, 6 months ago) by mcinquil
Content type: text/x-python
Branch: MAIN
Changes since 1.120: +84 -0 lines
Log Message:
Added limit on output sandbox for glite -105 MB

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8    
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 gutsche 1.38 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 gutsche 1.3 self._params = {}
17     self.cfg_params = cfg_params
18 gutsche 1.38
19 fanzago 1.115 # init BlackWhiteListParser
20     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21    
22 gutsche 1.72 try:
23     self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24     except KeyError:
25 slacapra 1.86 self.MaxTarBallSize = 9.5
26 gutsche 1.72
27 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
28 gutsche 1.38 self.ncjobs = ncjobs
29    
30 slacapra 1.1 log = common.logger
31    
32     self.scram = Scram.Scram(cfg_params)
33     self.additional_inbox_files = []
34     self.scriptExe = ''
35     self.executable = ''
36 slacapra 1.71 self.executable_arch = self.scram.getArch()
37 slacapra 1.1 self.tgz_name = 'default.tgz'
38 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
39 corvo 1.56 self.scriptName = 'CMSSW.sh'
40 spiga 1.42 self.pset = '' #scrip use case Da
41     self.datasetPath = '' #scrip use case Da
42 gutsche 1.3
43 gutsche 1.50 # set FJR file name
44     self.fjrFileName = 'crab_fjr.xml'
45    
46 slacapra 1.1 self.version = self.scram.getSWVersion()
47 spiga 1.114
48     #
49     # Try to block creation in case of arch/version mismatch
50     #
51    
52     a = string.split(self.version, "_")
53    
54     if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
56     raise CrabException(msg)
57     if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59     raise CrabException(msg)
60    
61 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
62 gutsche 1.5 self.setParam_('application', self.version)
63 slacapra 1.47
64 slacapra 1.1 ### collect Data cards
65 gutsche 1.66
66     ## get DBS mode
67     try:
68 slacapra 1.86 self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 gutsche 1.66 except KeyError:
70 slacapra 1.86 self.use_dbs_1 = 0
71 gutsche 1.66
72 slacapra 1.1 try:
73 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
74     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
75     if string.lower(tmp)=='none':
76     self.datasetPath = None
77 slacapra 1.21 self.selectNoInput = 1
78 slacapra 1.9 else:
79     self.datasetPath = tmp
80 slacapra 1.21 self.selectNoInput = 0
81 slacapra 1.1 except KeyError:
82 gutsche 1.3 msg = "Error: datasetpath not defined "
83 slacapra 1.1 raise CrabException(msg)
84 gutsche 1.5
85     # ML monitoring
86     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
87 slacapra 1.9 if not self.datasetPath:
88     self.setParam_('dataset', 'None')
89     self.setParam_('owner', 'None')
90     else:
91 gutsche 1.92 try:
92     datasetpath_split = self.datasetPath.split("/")
93     # standard style
94 mcinquil 1.120 self.setParam_('datasetFull', self.datasetPath)
95 gutsche 1.92 if self.use_dbs_1 == 1 :
96     self.setParam_('dataset', datasetpath_split[1])
97     self.setParam_('owner', datasetpath_split[-1])
98     else:
99     self.setParam_('dataset', datasetpath_split[1])
100     self.setParam_('owner', datasetpath_split[2])
101     except:
102     self.setParam_('dataset', self.datasetPath)
103     self.setParam_('owner', self.datasetPath)
104    
105 gutsche 1.8 self.setTaskid_()
106     self.setParam_('taskId', self.cfg_params['taskId'])
107 gutsche 1.5
108 slacapra 1.1 self.dataTiers = []
109    
110     ## now the application
111     try:
112     self.executable = cfg_params['CMSSW.executable']
113 gutsche 1.5 self.setParam_('exe', self.executable)
114 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
115     msg = "Default executable cmsRun overridden. Switch to " + self.executable
116     log.debug(3,msg)
117     except KeyError:
118     self.executable = 'cmsRun'
119 gutsche 1.5 self.setParam_('exe', self.executable)
120 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
121     log.debug(3,msg)
122     pass
123    
124     try:
125     self.pset = cfg_params['CMSSW.pset']
126     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 spiga 1.42 if self.pset.lower() != 'none' :
128     if (not os.path.exists(self.pset)):
129     raise CrabException("User defined PSet file "+self.pset+" does not exist")
130     else:
131     self.pset = None
132 slacapra 1.1 except KeyError:
133     raise CrabException("PSet file missing. Cannot run cmsRun ")
134    
135     # output files
136 slacapra 1.53 ## stuff which must be returned always via sandbox
137     self.output_file_sandbox = []
138    
139     # add fjr report by default via sandbox
140     self.output_file_sandbox.append(self.fjrFileName)
141    
142     # other output files to be returned via sandbox or copied to SE
143 slacapra 1.1 try:
144     self.output_file = []
145     tmp = cfg_params['CMSSW.output_file']
146     if tmp != '':
147     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
148     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
149     for tmp in tmpOutFiles:
150     tmp=string.strip(tmp)
151     self.output_file.append(tmp)
152     pass
153     else:
154 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155 slacapra 1.1 pass
156     pass
157     except KeyError:
158 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159 slacapra 1.1 pass
160    
161     # script_exe file as additional file in inputSandbox
162     try:
163 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
164     if self.scriptExe != '':
165     if not os.path.isfile(self.scriptExe):
166 slacapra 1.64 msg ="ERROR. file "+self.scriptExe+" not found"
167 slacapra 1.10 raise CrabException(msg)
168 spiga 1.42 self.additional_inbox_files.append(string.strip(self.scriptExe))
169 slacapra 1.1 except KeyError:
170 spiga 1.42 self.scriptExe = ''
171 slacapra 1.70
172 spiga 1.42 #CarlosDaniele
173     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174 slacapra 1.70 msg ="Error. script_exe not defined"
175 spiga 1.42 raise CrabException(msg)
176    
177 slacapra 1.1 ## additional input files
178     try:
179 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
180 slacapra 1.70 for tmp in tmpAddFiles:
181     tmp = string.strip(tmp)
182     dirname = ''
183     if not tmp[0]=="/": dirname = "."
184 corvo 1.85 files = []
185     if string.find(tmp,"*")>-1:
186     files = glob.glob(os.path.join(dirname, tmp))
187     if len(files)==0:
188     raise CrabException("No additional input file found with this pattern: "+tmp)
189     else:
190     files.append(tmp)
191 slacapra 1.70 for file in files:
192     if not os.path.exists(file):
193     raise CrabException("Additional input file not found: "+file)
194 slacapra 1.45 pass
195 slacapra 1.105 # fname = string.split(file, '/')[-1]
196     # storedFile = common.work_space.pathForTgz()+'share/'+fname
197     # shutil.copyfile(file, storedFile)
198     self.additional_inbox_files.append(string.strip(file))
199 slacapra 1.1 pass
200     pass
201 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
202 slacapra 1.1 except KeyError:
203     pass
204    
205 slacapra 1.9 # files per job
206 slacapra 1.1 try:
207 gutsche 1.35 if (cfg_params['CMSSW.files_per_jobs']):
208     raise CrabException("files_per_jobs no longer supported. Quitting.")
209 gutsche 1.3 except KeyError:
210 gutsche 1.35 pass
211 gutsche 1.3
212 slacapra 1.9 ## Events per job
213 gutsche 1.3 try:
214 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
215 slacapra 1.9 self.selectEventsPerJob = 1
216 gutsche 1.3 except KeyError:
217 slacapra 1.9 self.eventsPerJob = -1
218     self.selectEventsPerJob = 0
219    
220 slacapra 1.22 ## number of jobs
221     try:
222     self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
223     self.selectNumberOfJobs = 1
224     except KeyError:
225     self.theNumberOfJobs = 0
226     self.selectNumberOfJobs = 0
227 slacapra 1.10
228 gutsche 1.35 try:
229     self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
230     self.selectTotalNumberEvents = 1
231     except KeyError:
232     self.total_number_of_events = 0
233     self.selectTotalNumberEvents = 0
234    
235 spiga 1.42 if self.pset != None: #CarlosDaniele
236     if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
237     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
238     raise CrabException(msg)
239     else:
240     if (self.selectNumberOfJobs == 0):
241     msg = 'Must specify number_of_jobs.'
242     raise CrabException(msg)
243 gutsche 1.35
244 slacapra 1.22 ## source seed for pythia
245     try:
246     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
247     except KeyError:
248 slacapra 1.23 self.sourceSeed = None
249     common.logger.debug(5,"No seed given")
250 slacapra 1.22
251 slacapra 1.28 try:
252     self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
253     except KeyError:
254     self.sourceSeedVtx = None
255     common.logger.debug(5,"No vertex seed given")
256 slacapra 1.90
257     try:
258     self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259     except KeyError:
260     self.sourceSeedG4 = None
261     common.logger.debug(5,"No g4 sim hits seed given")
262    
263     try:
264     self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265     except KeyError:
266     self.sourceSeedMix = None
267     common.logger.debug(5,"No mix seed given")
268    
269 spiga 1.57 try:
270     self.firstRun = int(cfg_params['CMSSW.first_run'])
271     except KeyError:
272     self.firstRun = None
273     common.logger.debug(5,"No first run given")
274 spiga 1.42 if self.pset != None: #CarlosDaniele
275 slacapra 1.97 ver = string.split(self.version,"_")
276     if (int(ver[1])>=1 and int(ver[2])>=5):
277     import PsetManipulator150 as pp
278     else:
279     import PsetManipulator as pp
280     PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
281 gutsche 1.3
282 slacapra 1.1 #DBSDLS-start
283     ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
284     self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
285     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
286 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
287 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
288 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
289 gutsche 1.35 blockSites = {}
290 slacapra 1.9 if self.datasetPath:
291 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
292 slacapra 1.1 #DBSDLS-end
293    
294     self.tgzNameWithPath = self.getTarBall(self.executable)
295 slacapra 1.10
296 slacapra 1.9 ## Select Splitting
297 spiga 1.42 if self.selectNoInput:
298     if self.pset == None: #CarlosDaniele
299     self.jobSplittingForScript()
300     else:
301     self.jobSplittingNoInput()
302 gutsche 1.92 else:
303 corvo 1.56 self.jobSplittingByBlocks(blockSites)
304 gutsche 1.5
305 slacapra 1.22 # modify Pset
306 spiga 1.42 if self.pset != None: #CarlosDaniele
307 slacapra 1.86 try:
308     if (self.datasetPath): # standard job
309     # allow to processa a fraction of events in a file
310 slacapra 1.90 PsetEdit.inputModule("INPUT")
311     PsetEdit.maxEvent("INPUTMAXEVENTS")
312     PsetEdit.skipEvent("INPUTSKIPEVENTS")
313 slacapra 1.86 else: # pythia like job
314 slacapra 1.90 PsetEdit.maxEvent(self.eventsPerJob)
315 slacapra 1.86 if (self.firstRun):
316 slacapra 1.90 PsetEdit.pythiaFirstRun("INPUTFIRSTRUN") #First Run
317 slacapra 1.86 if (self.sourceSeed) :
318 slacapra 1.90 PsetEdit.pythiaSeed("INPUT")
319 slacapra 1.86 if (self.sourceSeedVtx) :
320 slacapra 1.90 PsetEdit.vtxSeed("INPUTVTX")
321     if (self.sourceSeedG4) :
322 slacapra 1.118 PsetEdit.g4Seed("INPUTG4")
323 slacapra 1.90 if (self.sourceSeedMix) :
324 slacapra 1.118 PsetEdit.mixSeed("INPUTMIX")
325 slacapra 1.86 # add FrameworkJobReport to parameter-set
326 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
327     PsetEdit.psetWriter(self.configFilename())
328 slacapra 1.86 except:
329     msg='Error while manipuliating ParameterSet: exiting...'
330     raise CrabException(msg)
331 gutsche 1.3
332 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
333    
334 slacapra 1.86 import DataDiscovery
335     import DataDiscovery_DBS2
336     import DataLocation
337 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
338    
339     datasetPath=self.datasetPath
340    
341 slacapra 1.1 ## Contact the DBS
342 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
343 slacapra 1.1 try:
344 gutsche 1.66
345 slacapra 1.86 if self.use_dbs_1 == 1 :
346     self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
347     else :
348 corvo 1.85 self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
349 slacapra 1.1 self.pubdata.fetchDBSInfo()
350    
351 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
352 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
353     raise CrabException(msg)
354 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
355 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
356     raise CrabException(msg)
357 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
358 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
359 slacapra 1.1 raise CrabException(msg)
360 gutsche 1.67 except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
361     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
362     raise CrabException(msg)
363     except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
364     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
365     raise CrabException(msg)
366     except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
367     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
368     raise CrabException(msg)
369 slacapra 1.1
370 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
371 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
372     self.eventsbyfile=self.pubdata.getEventsPerFile()
373 gutsche 1.3
374 slacapra 1.1 ## get max number of events
375     self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
376    
377     ## Contact the DLS and build a list of sites hosting the fileblocks
378     try:
379 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
380 gutsche 1.6 dataloc.fetchDLSInfo()
381 slacapra 1.41 except DataLocation.DataLocationError , ex:
382 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
383     raise CrabException(msg)
384    
385    
386 gutsche 1.35 sites = dataloc.getSites()
387     allSites = []
388     listSites = sites.values()
389 slacapra 1.63 for listSite in listSites:
390     for oneSite in listSite:
391 gutsche 1.35 allSites.append(oneSite)
392     allSites = self.uniquelist(allSites)
393 gutsche 1.3
394 gutsche 1.92 # screen output
395     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
396    
397 gutsche 1.35 return sites
398 gutsche 1.3
399 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
400 slacapra 1.9 """
401 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
402     and no more than one block.
403     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
404     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
405     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
406     self.maxEvents, self.filesbyblock
407     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
408     self.total_number_of_jobs - Total # of jobs
409     self.list_of_args - File(s) job will run on (a list of lists)
410     """
411    
412     # ---- Handle the possible job splitting configurations ---- #
413     if (self.selectTotalNumberEvents):
414     totalEventsRequested = self.total_number_of_events
415     if (self.selectEventsPerJob):
416     eventsPerJobRequested = self.eventsPerJob
417     if (self.selectNumberOfJobs):
418     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
419    
420     # If user requested all the events in the dataset
421     if (totalEventsRequested == -1):
422     eventsRemaining=self.maxEvents
423     # If user requested more events than are in the dataset
424     elif (totalEventsRequested > self.maxEvents):
425     eventsRemaining = self.maxEvents
426     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
427     # If user requested less events than are in the dataset
428     else:
429     eventsRemaining = totalEventsRequested
430 slacapra 1.22
431 slacapra 1.41 # If user requested more events per job than are in the dataset
432     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
433     eventsPerJobRequested = self.maxEvents
434    
435 gutsche 1.35 # For user info at end
436     totalEventCount = 0
437 gutsche 1.3
438 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
439     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
440 slacapra 1.22
441 gutsche 1.35 if (self.selectNumberOfJobs):
442     common.logger.message("May not create the exact number_of_jobs requested.")
443 slacapra 1.23
444 gutsche 1.38 if ( self.ncjobs == 'all' ) :
445     totalNumberOfJobs = 999999999
446     else :
447     totalNumberOfJobs = self.ncjobs
448    
449    
450 gutsche 1.35 blocks = blockSites.keys()
451     blockCount = 0
452     # Backup variable in case self.maxEvents counted events in a non-included block
453     numBlocksInDataset = len(blocks)
454 gutsche 1.3
455 gutsche 1.35 jobCount = 0
456     list_of_lists = []
457 gutsche 1.3
458 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
459     jobsOfBlock = {}
460    
461 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
462     # ---- we've met the requested total # of events ---- #
463 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
464 gutsche 1.35 block = blocks[blockCount]
465 gutsche 1.44 blockCount += 1
466 gutsche 1.104 if block not in jobsOfBlock.keys() :
467     jobsOfBlock[block] = []
468 gutsche 1.44
469 gutsche 1.68 if self.eventsbyblock.has_key(block) :
470     numEventsInBlock = self.eventsbyblock[block]
471     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
472 slacapra 1.9
473 gutsche 1.68 files = self.filesbyblock[block]
474     numFilesInBlock = len(files)
475     if (numFilesInBlock <= 0):
476     continue
477     fileCount = 0
478    
479     # ---- New block => New job ---- #
480     parString = "\\{"
481     # counter for number of events in files currently worked on
482     filesEventCount = 0
483     # flag if next while loop should touch new file
484     newFile = 1
485     # job event counter
486     jobSkipEventCount = 0
487 slacapra 1.9
488 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
489     # ---- total # of events or we've gone over all the files in this block ---- #
490     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
491     file = files[fileCount]
492     if newFile :
493     try:
494     numEventsInFile = self.eventsbyfile[file]
495     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
496     # increase filesEventCount
497     filesEventCount += numEventsInFile
498     # Add file to current job
499     parString += '\\\"' + file + '\\\"\,'
500     newFile = 0
501     except KeyError:
502     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
503 slacapra 1.41
504 gutsche 1.38
505 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
506     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
507     # if last file in block
508     if ( fileCount == numFilesInBlock-1 ) :
509     # end job using last file, use remaining events in block
510     # close job and touch new file
511     fullString = parString[:-2]
512     fullString += '\\}'
513     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
514     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
515     self.jobDestination.append(blockSites[block])
516     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
517 gutsche 1.92 # fill jobs of block dictionary
518 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
519 gutsche 1.68 # reset counter
520     jobCount = jobCount + 1
521     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
522     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
523     jobSkipEventCount = 0
524     # reset file
525     parString = "\\{"
526     filesEventCount = 0
527     newFile = 1
528     fileCount += 1
529     else :
530     # go to next file
531     newFile = 1
532     fileCount += 1
533     # if events in file equal to eventsPerJobRequested
534     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
535 gutsche 1.38 # close job and touch new file
536     fullString = parString[:-2]
537     fullString += '\\}'
538 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
539     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
540 gutsche 1.38 self.jobDestination.append(blockSites[block])
541     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
542 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
543 gutsche 1.38 # reset counter
544     jobCount = jobCount + 1
545 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
546     eventsRemaining = eventsRemaining - eventsPerJobRequested
547 gutsche 1.38 jobSkipEventCount = 0
548     # reset file
549     parString = "\\{"
550     filesEventCount = 0
551     newFile = 1
552     fileCount += 1
553 gutsche 1.68
554     # if more events in file remain than eventsPerJobRequested
555 gutsche 1.38 else :
556 gutsche 1.68 # close job but don't touch new file
557     fullString = parString[:-2]
558     fullString += '\\}'
559     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
560     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
561     self.jobDestination.append(blockSites[block])
562     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
563 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
564 gutsche 1.68 # increase counter
565     jobCount = jobCount + 1
566     totalEventCount = totalEventCount + eventsPerJobRequested
567     eventsRemaining = eventsRemaining - eventsPerJobRequested
568     # calculate skip events for last file
569     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
570     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
571     # remove all but the last file
572     filesEventCount = self.eventsbyfile[file]
573     parString = "\\{"
574     parString += '\\\"' + file + '\\\"\,'
575     pass # END if
576     pass # END while (iterate over files in the block)
577 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
578 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
579 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
580 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
581 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
582 slacapra 1.22
583 gutsche 1.92 # screen output
584     screenOutput = "List of jobs and available destination sites:\n\n"
585    
586     blockCounter = 0
587 gutsche 1.104 for block in blocks:
588     if block in jobsOfBlock.keys() :
589     blockCounter += 1
590 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
591 gutsche 1.92
592 fanzago 1.115 common.logger.message(screenOutput)
593 gutsche 1.92
594 slacapra 1.9 self.list_of_args = list_of_lists
595     return
596    
597 slacapra 1.21 def jobSplittingNoInput(self):
598 slacapra 1.9 """
599     Perform job splitting based on number of event per job
600     """
601     common.logger.debug(5,'Splitting per events')
602     common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
603 slacapra 1.22 common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
604 slacapra 1.9 common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
605    
606 slacapra 1.10 if (self.total_number_of_events < 0):
607     msg='Cannot split jobs per Events with "-1" as total number of events'
608     raise CrabException(msg)
609    
610 slacapra 1.22 if (self.selectEventsPerJob):
611 spiga 1.65 if (self.selectTotalNumberEvents):
612     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
613     elif(self.selectNumberOfJobs) :
614     self.total_number_of_jobs =self.theNumberOfJobs
615     self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
616    
617 slacapra 1.22 elif (self.selectNumberOfJobs) :
618     self.total_number_of_jobs = self.theNumberOfJobs
619     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
620 spiga 1.65
621 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
622    
623     # is there any remainder?
624     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
625    
626     common.logger.debug(5,'Check '+str(check))
627    
628 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
629 slacapra 1.9 if check > 0:
630 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
631 slacapra 1.9
632 slacapra 1.10 # argument is seed number.$i
633 slacapra 1.9 self.list_of_args = []
634     for i in range(self.total_number_of_jobs):
635 gutsche 1.35 ## Since there is no input, any site is good
636 slacapra 1.86 # self.jobDestination.append(["Any"])
637 spiga 1.42 self.jobDestination.append([""]) #must be empty to write correctly the xml
638 slacapra 1.90 args=[]
639 spiga 1.57 if (self.firstRun):
640     ## pythia first run
641 slacapra 1.86 #self.list_of_args.append([(str(self.firstRun)+str(i))])
642 slacapra 1.90 args.append(str(self.firstRun)+str(i))
643 spiga 1.57 else:
644     ## no first run
645 slacapra 1.86 #self.list_of_args.append([str(i)])
646 slacapra 1.90 args.append(str(i))
647 slacapra 1.23 if (self.sourceSeed):
648 slacapra 1.90 args.append(str(self.sourceSeed)+str(i))
649 slacapra 1.28 if (self.sourceSeedVtx):
650 slacapra 1.90 ## + vtx random seed
651     args.append(str(self.sourceSeedVtx)+str(i))
652     if (self.sourceSeedG4):
653     ## + G4 random seed
654     args.append(str(self.sourceSeedG4)+str(i))
655     if (self.sourceSeedMix):
656     ## + Mix random seed
657     args.append(str(self.sourceSeedMix)+str(i))
658     pass
659     pass
660     self.list_of_args.append(args)
661     pass
662 slacapra 1.86
663 slacapra 1.90 # print self.list_of_args
664 gutsche 1.3
665     return
666    
667 spiga 1.42
668     def jobSplittingForScript(self):#CarlosDaniele
669     """
670     Perform job splitting based on number of job
671     """
672     common.logger.debug(5,'Splitting per job')
673     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
674    
675     self.total_number_of_jobs = self.theNumberOfJobs
676    
677     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
678    
679     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
680    
681     # argument is seed number.$i
682     self.list_of_args = []
683     for i in range(self.total_number_of_jobs):
684     ## Since there is no input, any site is good
685     # self.jobDestination.append(["Any"])
686     self.jobDestination.append([""])
687     ## no random seed
688     self.list_of_args.append([str(i)])
689     return
690    
691 gutsche 1.3 def split(self, jobParams):
692    
693     common.jobDB.load()
694     #### Fabio
695     njobs = self.total_number_of_jobs
696 slacapra 1.9 arglist = self.list_of_args
697 gutsche 1.3 # create the empty structure
698     for i in range(njobs):
699     jobParams.append("")
700    
701     for job in range(njobs):
702 slacapra 1.17 jobParams[job] = arglist[job]
703     # print str(arglist[job])
704     # print jobParams[job]
705 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
706 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
707     common.jobDB.setDestination(job, self.jobDestination[job])
708 gutsche 1.3
709     common.jobDB.save()
710     return
711    
712     def getJobTypeArguments(self, nj, sched):
713 slacapra 1.17 result = ''
714     for i in common.jobDB.arguments(nj):
715     result=result+str(i)+" "
716     return result
717 gutsche 1.3
718     def numberOfJobs(self):
719     # Fabio
720     return self.total_number_of_jobs
721    
722 slacapra 1.1 def getTarBall(self, exe):
723     """
724     Return the TarBall with lib and exe
725     """
726    
727     # if it exist, just return it
728 corvo 1.56 #
729     # Marco. Let's start to use relative path for Boss XML files
730     #
731     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
732 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
733     return self.tgzNameWithPath
734    
735     # Prepare a tar gzipped file with user binaries.
736     self.buildTar_(exe)
737    
738     return string.strip(self.tgzNameWithPath)
739    
740     def buildTar_(self, executable):
741    
742     # First of all declare the user Scram area
743     swArea = self.scram.getSWArea_()
744     #print "swArea = ", swArea
745 slacapra 1.63 # swVersion = self.scram.getSWVersion()
746     # print "swVersion = ", swVersion
747 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
748     #print "swReleaseTop = ", swReleaseTop
749    
750     ## check if working area is release top
751     if swReleaseTop == '' or swArea == swReleaseTop:
752     return
753    
754 slacapra 1.61 import tarfile
755     try: # create tar ball
756     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
757     ## First find the executable
758 slacapra 1.86 if (self.executable != ''):
759 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
760     if ( not exeWithPath ):
761     raise CrabException('User executable '+executable+' not found')
762    
763     ## then check if it's private or not
764     if exeWithPath.find(swReleaseTop) == -1:
765     # the exe is private, so we must ship
766     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
767     path = swArea+'/'
768 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
769     if exeWithPath.find(path) >= 0 :
770     exe = string.replace(exeWithPath, path,'')
771     tar.add(path+exe,os.path.basename(executable))
772     else :
773     tar.add(exeWithPath,os.path.basename(executable))
774 slacapra 1.61 pass
775     else:
776     # the exe is from release, we'll find it on WN
777     pass
778    
779     ## Now get the libraries: only those in local working area
780     libDir = 'lib'
781     lib = swArea+'/' +libDir
782     common.logger.debug(5,"lib "+lib+" to be tarred")
783     if os.path.exists(lib):
784     tar.add(lib,libDir)
785    
786     ## Now check if module dir is present
787     moduleDir = 'module'
788     module = swArea + '/' + moduleDir
789     if os.path.isdir(module):
790     tar.add(module,moduleDir)
791    
792     ## Now check if any data dir(s) is present
793     swAreaLen=len(swArea)
794     for root, dirs, files in os.walk(swArea):
795     if "data" in dirs:
796     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
797     tar.add(root+"/data",root[swAreaLen:]+"/data")
798    
799     ## Add ProdAgent dir to tar
800     paDir = 'ProdAgentApi'
801     pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
802     if os.path.isdir(pa):
803     tar.add(pa,paDir)
804 fanzago 1.93
805     ### FEDE FOR DBS PUBLICATION
806     ## Add PRODCOMMON dir to tar
807     prodcommonDir = 'ProdCommon'
808     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
809     if os.path.isdir(prodcommonPath):
810     tar.add(prodcommonPath,prodcommonDir)
811     #############################
812 slacapra 1.61
813     common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
814     tar.close()
815     except :
816     raise CrabException('Could not create tar-ball')
817 gutsche 1.72
818     ## check for tarball size
819     tarballinfo = os.stat(self.tgzNameWithPath)
820     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
821     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
822    
823 slacapra 1.61 ## create tar-ball with ML stuff
824 corvo 1.58 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
825 slacapra 1.61 try:
826     tar = tarfile.open(self.MLtgzfile, "w:gz")
827     path=os.environ['CRABDIR'] + '/python/'
828     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
829     tar.add(path+file,file)
830     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
831     tar.close()
832     except :
833 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
834    
835 slacapra 1.1 return
836    
837 slacapra 1.97 def additionalInputFileTgz(self):
838     """
839     Put all additional files into a tar ball and return its name
840     """
841     import tarfile
842     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
843     tar = tarfile.open(tarName, "w:gz")
844     for file in self.additional_inbox_files:
845     tar.add(file,string.split(file,'/')[-1])
846     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
847     tar.close()
848     return tarName
849    
850 slacapra 1.1 def wsSetupEnvironment(self, nj):
851     """
852     Returns part of a job script which prepares
853     the execution environment for the job 'nj'.
854     """
855     # Prepare JobType-independent part
856 gutsche 1.3 txt = ''
857    
858     ## OLI_Daniele at this level middleware already known
859    
860     txt += 'if [ $middleware == LCG ]; then \n'
861 fanzago 1.115 txt += ' echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
862     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
863     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
864 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
865     txt += 'elif [ $middleware == OSG ]; then\n'
866 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
867     txt += ' echo "Created working directory: $WORKING_DIR"\n'
868 gutsche 1.3 txt += ' if [ ! -d $WORKING_DIR ] ;then\n'
869 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
870 slacapra 1.90 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
871     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
872     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
873 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
874     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
875     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
876 gutsche 1.3 txt += ' exit 1\n'
877     txt += ' fi\n'
878     txt += '\n'
879     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
880     txt += ' cd $WORKING_DIR\n'
881     txt += self.wsSetupCMSOSGEnvironment_()
882 fanzago 1.115 txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
883     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
884 gutsche 1.3 txt += 'fi\n'
885 slacapra 1.1
886     # Prepare JobType-specific part
887     scram = self.scram.commandName()
888     txt += '\n\n'
889     txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
890     txt += scram+' project CMSSW '+self.version+'\n'
891     txt += 'status=$?\n'
892     txt += 'if [ $status != 0 ] ; then\n'
893 gutsche 1.7 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
894 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
895 gutsche 1.7 txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
896 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
897 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
898     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
899     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
900 gutsche 1.3 ## OLI_Daniele
901     txt += ' if [ $middleware == OSG ]; then \n'
902     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
903     txt += ' cd $RUNTIME_AREA\n'
904     txt += ' /bin/rm -rf $WORKING_DIR\n'
905     txt += ' if [ -d $WORKING_DIR ] ;then\n'
906 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
907     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
908     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
909     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
910 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
911     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
912     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
913 gutsche 1.3 txt += ' fi\n'
914     txt += ' fi \n'
915     txt += ' exit 1 \n'
916 slacapra 1.1 txt += 'fi \n'
917     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
918     txt += 'cd '+self.version+'\n'
919 fanzago 1.99 ########## FEDE FOR DBS2 ######################
920     txt += 'SOFTWARE_DIR=`pwd`\n'
921     txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
922     ###############################################
923 slacapra 1.1 ### needed grep for bug in scramv1 ###
924 corvo 1.58 txt += scram+' runtime -sh\n'
925 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
926 corvo 1.58 txt += 'echo $PATH\n'
927 slacapra 1.1
928     # Handle the arguments:
929     txt += "\n"
930 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
931 slacapra 1.1 txt += "\n"
932 mkirn 1.32 # txt += "narg=$#\n"
933     txt += "if [ $nargs -lt 2 ]\n"
934 slacapra 1.1 txt += "then\n"
935 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
936 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
937 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
938 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
939 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
940     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
941     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
942 gutsche 1.3 ## OLI_Daniele
943     txt += ' if [ $middleware == OSG ]; then \n'
944     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
945     txt += ' cd $RUNTIME_AREA\n'
946     txt += ' /bin/rm -rf $WORKING_DIR\n'
947     txt += ' if [ -d $WORKING_DIR ] ;then\n'
948 fanzago 1.96 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
949     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
950     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
951     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
952 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
953     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
954     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
955 gutsche 1.3 txt += ' fi\n'
956     txt += ' fi \n'
957 slacapra 1.1 txt += " exit 1\n"
958     txt += "fi\n"
959     txt += "\n"
960    
961     # Prepare job-specific part
962     job = common.job_list[nj]
963 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
964     if (self.datasetPath):
965     txt += '\n'
966     txt += 'DatasetPath='+self.datasetPath+'\n'
967    
968     datasetpath_split = self.datasetPath.split("/")
969    
970     txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
971     txt += 'DataTier='+datasetpath_split[2]+'\n'
972 fanzago 1.96 #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
973     txt += 'ApplicationFamily=cmsRun\n'
974 fanzago 1.93
975     else:
976     txt += 'DatasetPath=MCDataTier\n'
977     txt += 'PrimaryDataset=null\n'
978     txt += 'DataTier=null\n'
979 fanzago 1.96 #txt += 'ProcessedDataset=null\n'
980 fanzago 1.93 txt += 'ApplicationFamily=MCDataTier\n'
981 spiga 1.42 if self.pset != None: #CarlosDaniele
982     pset = os.path.basename(job.configFilename())
983     txt += '\n'
984 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
985 spiga 1.42 if (self.datasetPath): # standard job
986     #txt += 'InputFiles=$2\n'
987     txt += 'InputFiles=${args[1]}\n'
988     txt += 'MaxEvents=${args[2]}\n'
989     txt += 'SkipEvents=${args[3]}\n'
990     txt += 'echo "Inputfiles:<$InputFiles>"\n'
991 slacapra 1.90 txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
992 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
993 slacapra 1.90 txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
994 spiga 1.42 txt += 'echo "SkipEvents:<$SkipEvents>"\n'
995 slacapra 1.90 txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
996 spiga 1.42 else: # pythia like job
997 slacapra 1.90 seedIndex=1
998     if (self.firstRun):
999     txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
1000 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
1001 slacapra 1.90 txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1002     seedIndex=seedIndex+1
1003    
1004 spiga 1.57 if (self.sourceSeed):
1005 slacapra 1.90 txt += 'Seed=${args['+str(seedIndex)+']}\n'
1006     txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1007     seedIndex=seedIndex+1
1008     ## the following seeds are not always present
1009 spiga 1.42 if (self.sourceSeedVtx):
1010 slacapra 1.90 txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1011 spiga 1.42 txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1012 slacapra 1.90 txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1013     seedIndex += 1
1014     if (self.sourceSeedG4):
1015     txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1016     txt += 'echo "G4Seed: <$G4Seed>"\n'
1017     txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1018     seedIndex += 1
1019     if (self.sourceSeedMix):
1020     txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1021     txt += 'echo "MixSeed: <$mixSeed>"\n'
1022     txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1023     seedIndex += 1
1024     pass
1025     pass
1026     txt += 'mv -f '+pset+' pset.cfg\n'
1027 slacapra 1.1
1028     if len(self.additional_inbox_files) > 0:
1029 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1030     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1031     txt += 'fi\n'
1032 slacapra 1.1 pass
1033    
1034 spiga 1.42 if self.pset != None: #CarlosDaniele
1035     txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1036    
1037     txt += '\n'
1038     txt += 'echo "***** cat pset.cfg *********"\n'
1039     txt += 'cat pset.cfg\n'
1040     txt += 'echo "****** end pset.cfg ********"\n'
1041     txt += '\n'
1042 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1043 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1044     txt += 'echo "PSETHASH = $PSETHASH" \n'
1045 fanzago 1.93 ##############
1046     txt += '\n'
1047     # txt += 'echo "***** cat pset1.cfg *********"\n'
1048     # txt += 'cat pset1.cfg\n'
1049     # txt += 'echo "****** end pset1.cfg ********"\n'
1050 gutsche 1.3 return txt
1051    
1052 slacapra 1.63 def wsBuildExe(self, nj=0):
1053 gutsche 1.3 """
1054     Put in the script the commands to build an executable
1055     or a library.
1056     """
1057    
1058     txt = ""
1059    
1060     if os.path.isfile(self.tgzNameWithPath):
1061     txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
1062     txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1063     txt += 'untar_status=$? \n'
1064     txt += 'if [ $untar_status -ne 0 ]; then \n'
1065     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1066     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
1067 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1068 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
1069     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1070     txt += ' cd $RUNTIME_AREA\n'
1071     txt += ' /bin/rm -rf $WORKING_DIR\n'
1072     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1073 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1074     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
1075     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1076     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1077     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1078     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1079     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1080 gutsche 1.3 txt += ' fi\n'
1081     txt += ' fi \n'
1082     txt += ' \n'
1083 gutsche 1.7 txt += ' exit 1 \n'
1084 gutsche 1.3 txt += 'else \n'
1085     txt += ' echo "Successful untar" \n'
1086     txt += 'fi \n'
1087 gutsche 1.50 txt += '\n'
1088 fanzago 1.93 txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1089 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1090 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
1091 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1092     #txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1093 fanzago 1.93 #txt += ' export PYTHONPATH=ProdAgentApi\n'
1094 gutsche 1.50 txt += 'else\n'
1095 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1096     #txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1097 fanzago 1.93 #txt += ' export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1098     txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1099     ###################
1100 gutsche 1.50 txt += 'fi\n'
1101     txt += '\n'
1102    
1103 gutsche 1.3 pass
1104    
1105 slacapra 1.1 return txt
1106    
1107     def modifySteeringCards(self, nj):
1108     """
1109     modify the card provided by the user,
1110     writing a new card into share dir
1111     """
1112    
1113     def executableName(self):
1114 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1115 spiga 1.42 return "sh "
1116     else:
1117     return self.executable
1118 slacapra 1.1
1119     def executableArgs(self):
1120 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1121 spiga 1.42 return self.scriptExe + " $NJob"
1122 fanzago 1.115 else:
1123     # if >= CMSSW_1_5_X, add -e
1124     version_array = self.scram.getSWVersion().split('_')
1125     major = 0
1126     minor = 0
1127     try:
1128     major = int(version_array[1])
1129     minor = int(version_array[2])
1130     except:
1131     msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1132     raise CrabException(msg)
1133     if major >= 1 and minor >= 5 :
1134     return " -e -p pset.cfg"
1135     else:
1136     return " -p pset.cfg"
1137 slacapra 1.1
1138     def inputSandbox(self, nj):
1139     """
1140     Returns a list of filenames to be put in JDL input sandbox.
1141     """
1142     inp_box = []
1143 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1144     # seen = {}
1145 slacapra 1.1 ## code
1146     if os.path.isfile(self.tgzNameWithPath):
1147     inp_box.append(self.tgzNameWithPath)
1148 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1149     inp_box.append(self.MLtgzfile)
1150 slacapra 1.1 ## config
1151 slacapra 1.70 if not self.pset is None:
1152 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1153 slacapra 1.1 ## additional input files
1154 slacapra 1.97 tgz = self.additionalInputFileTgz()
1155     inp_box.append(tgz)
1156 slacapra 1.1 return inp_box
1157    
1158     def outputSandbox(self, nj):
1159     """
1160     Returns a list of filenames to be put in JDL output sandbox.
1161     """
1162     out_box = []
1163    
1164     ## User Declared output files
1165 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1166 slacapra 1.1 n_out = nj + 1
1167     out_box.append(self.numberFile_(out,str(n_out)))
1168     return out_box
1169    
1170     def prepareSteeringCards(self):
1171     """
1172     Make initial modifications of the user's steering card file.
1173     """
1174     return
1175    
1176     def wsRenameOutput(self, nj):
1177     """
1178     Returns part of a job script which renames the produced files.
1179     """
1180    
1181     txt = '\n'
1182 gutsche 1.7 txt += '# directory content\n'
1183     txt += 'ls \n'
1184 slacapra 1.54
1185     for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1186 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1187     txt += '\n'
1188 gutsche 1.7 txt += '# check output file\n'
1189 slacapra 1.105 # txt += 'ls '+fileWithSuffix+'\n'
1190     # txt += 'ls_result=$?\n'
1191 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1192 fanzago 1.117 ###### FEDE FOR OUTPUT DATA PUBLICATION ########
1193     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1194     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1195     ################################################
1196 slacapra 1.106 txt += 'else\n'
1197 fanzago 1.117 txt += ' exit_status=60302\n'
1198     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1199     ############# FEDE ADDED CHECK FOR OUTPUT #############
1200 mcinquil 1.119 ## MATTY's FIX: the exit option was interrupting the execution
1201 fanzago 1.117 if fileWithSuffix in self.output_file:
1202     txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1203 mcinquil 1.119 txt += ' # exit $exit_status\n'
1204 fanzago 1.117 #######################################################
1205 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
1206     txt += ' if [ $middleware == OSG ]; then \n'
1207     txt += ' echo "prepare dummy output file"\n'
1208     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1209     txt += ' fi \n'
1210 slacapra 1.1 txt += 'fi\n'
1211 slacapra 1.105 file_list = []
1212     for fileWithSuffix in (self.output_file):
1213     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1214 fanzago 1.117
1215 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1216 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1217 mcinquil 1.121
1218 slacapra 1.1 return txt
1219    
1220     def numberFile_(self, file, txt):
1221     """
1222     append _'txt' before last extension of a file
1223     """
1224     p = string.split(file,".")
1225     # take away last extension
1226     name = p[0]
1227     for x in p[1:-1]:
1228 slacapra 1.90 name=name+"."+x
1229 slacapra 1.1 # add "_txt"
1230     if len(p)>1:
1231 slacapra 1.90 ext = p[len(p)-1]
1232     result = name + '_' + txt + "." + ext
1233 slacapra 1.1 else:
1234 slacapra 1.90 result = name + '_' + txt
1235 slacapra 1.1
1236     return result
1237    
1238 slacapra 1.63 def getRequirements(self, nj=[]):
1239 slacapra 1.1 """
1240     return job requirements to add to jdl files
1241     """
1242     req = ''
1243 slacapra 1.47 if self.version:
1244 slacapra 1.10 req='Member("VO-cms-' + \
1245 slacapra 1.47 self.version + \
1246 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1247 farinafa 1.111 ## SL add requirement for OS version only if SL4
1248     #reSL4 = re.compile( r'slc4' )
1249 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1250 gutsche 1.107 req+=' && Member("VO-cms-' + \
1251 slacapra 1.105 self.executable_arch + \
1252     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1253 gutsche 1.35
1254     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1255    
1256 slacapra 1.1 return req
1257 gutsche 1.3
1258     def configFilename(self):
1259     """ return the config filename """
1260     return self.name()+'.cfg'
1261    
1262     ### OLI_DANIELE
1263     def wsSetupCMSOSGEnvironment_(self):
1264     """
1265     Returns part of a job script which is prepares
1266     the execution environment and which is common for all CMS jobs.
1267     """
1268     txt = '\n'
1269     txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n'
1270     txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1271     txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1272 spiga 1.87 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1273 gutsche 1.3 txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1274 mkirn 1.40 txt += ' elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1275     txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1276 spiga 1.87 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1277 mkirn 1.40 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1278 gutsche 1.3 txt += ' else\n'
1279 mkirn 1.40 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1280 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1281     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1282     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1283 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1284     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1285     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1286 gutsche 1.7 txt += ' exit 1\n'
1287 gutsche 1.3 txt += '\n'
1288     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1289     txt += ' cd $RUNTIME_AREA\n'
1290     txt += ' /bin/rm -rf $WORKING_DIR\n'
1291     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1292 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1293     txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1294     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1295     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1296     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1297     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1298     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1299 gutsche 1.3 txt += ' fi\n'
1300     txt += '\n'
1301 gutsche 1.7 txt += ' exit 1\n'
1302 gutsche 1.3 txt += ' fi\n'
1303     txt += '\n'
1304     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1305     txt += ' echo " END SETUP CMS OSG ENVIRONMENT "\n'
1306    
1307     return txt
1308    
1309     ### OLI_DANIELE
1310     def wsSetupCMSLCGEnvironment_(self):
1311     """
1312     Returns part of a job script which is prepares
1313     the execution environment and which is common for all CMS jobs.
1314     """
1315     txt = ' \n'
1316     txt += ' echo " ### SETUP CMS LCG ENVIRONMENT ### "\n'
1317     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1318     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1319     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1320     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1321     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1322 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1323     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1324     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1325 gutsche 1.7 txt += ' exit 1\n'
1326 gutsche 1.3 txt += ' else\n'
1327     txt += ' echo "Sourcing environment... "\n'
1328     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1329     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1330     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1331     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1332     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1333 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1334     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1335     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1336 gutsche 1.7 txt += ' exit 1\n'
1337 gutsche 1.3 txt += ' fi\n'
1338     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1339     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1340     txt += ' result=$?\n'
1341     txt += ' if [ $result -ne 0 ]; then\n'
1342     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1343     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1344     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1345     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1346 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1347     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1348     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1349 gutsche 1.7 txt += ' exit 1\n'
1350 gutsche 1.3 txt += ' fi\n'
1351     txt += ' fi\n'
1352     txt += ' \n'
1353     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1354     txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1355     return txt
1356 gutsche 1.5
1357 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1358     def modifyReport(self, nj):
1359     """
1360     insert the part of the script that modifies the FrameworkJob Report
1361     """
1362 fanzago 1.94
1363 fanzago 1.93 txt = ''
1364     txt += 'echo "Modify Job Report" \n'
1365 fanzago 1.99 #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1366     ################ FEDE FOR DBS2 #############################################
1367     txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1368     #############################################################################
1369 fanzago 1.94 try:
1370     publish_data = int(self.cfg_params['USER.publish_data'])
1371     except KeyError:
1372     publish_data = 0
1373    
1374 fanzago 1.93 txt += 'if [ -z "$SE" ]; then\n'
1375     txt += ' SE="" \n'
1376     txt += 'fi \n'
1377     txt += 'if [ -z "$SE_PATH" ]; then\n'
1378     txt += ' SE_PATH="" \n'
1379     txt += 'fi \n'
1380     txt += 'echo "SE = $SE"\n'
1381     txt += 'echo "SE_PATH = $SE_PATH"\n'
1382 fanzago 1.94
1383     if (publish_data == 1):
1384     #processedDataset = self.cfg_params['USER.processed_datasetname']
1385     processedDataset = self.cfg_params['USER.publish_data_name']
1386     txt += 'ProcessedDataset='+processedDataset+'\n'
1387     #### LFN=/store/user/<user>/processedDataset_PSETHASH
1388     txt += 'if [ "$SE_PATH" == "" ]; then\n'
1389 fanzago 1.101 #### FEDE: added slash in LFN ##############
1390     txt += ' FOR_LFN=/copy_problems/ \n'
1391 fanzago 1.94 txt += 'else \n'
1392     txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1393 fanzago 1.101 ##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1394 fanzago 1.94 txt += ' FOR_LFN=/store$tmp \n'
1395     txt += 'fi \n'
1396     txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1397     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1398 spiga 1.103 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1399     #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1400     txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1401     txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1402     #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1403    
1404     txt += 'modifyReport_result=$?\n'
1405     txt += 'echo modifyReport_result = $modifyReport_result\n'
1406     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1407     txt += ' exit_status=1\n'
1408     txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1409     txt += 'else\n'
1410     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1411     txt += 'fi\n'
1412 fanzago 1.94 else:
1413     txt += 'ProcessedDataset=no_data_to_publish \n'
1414 fanzago 1.101 #### FEDE: added slash in LFN ##############
1415     txt += 'FOR_LFN=/local/ \n'
1416 fanzago 1.94 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1417     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1418 fanzago 1.93 return txt
1419 fanzago 1.99
1420     def cleanEnv(self):
1421     ### OLI_DANIELE
1422     txt = ''
1423     txt += 'if [ $middleware == OSG ]; then\n'
1424     txt += ' cd $RUNTIME_AREA\n'
1425     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1426     txt += ' /bin/rm -rf $WORKING_DIR\n'
1427     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1428     txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1429     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1430     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1431     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1432     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1433     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1434     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1435     txt += ' fi\n'
1436     txt += 'fi\n'
1437     txt += '\n'
1438     return txt
1439 fanzago 1.93
1440 gutsche 1.5 def setParam_(self, param, value):
1441     self._params[param] = value
1442    
1443     def getParams(self):
1444     return self._params
1445 gutsche 1.8
1446     def setTaskid_(self):
1447     self._taskId = self.cfg_params['taskId']
1448    
1449     def getTaskid(self):
1450     return self._taskId
1451 gutsche 1.35
1452     def uniquelist(self, old):
1453     """
1454     remove duplicates from a list
1455     """
1456     nd={}
1457     for e in old:
1458     nd[e]=0
1459     return nd.keys()
1460 mcinquil 1.121
1461    
1462     def checkOut(self, limit):
1463     """
1464     check the dimension of the output files
1465     """
1466     txt = 'echo "*****************************************"\n'
1467     txt += 'echo "** Starting output sandbox limit check **"\n'
1468     txt += 'echo "*****************************************"\n'
1469     allOutFiles = ""
1470     listOutFiles = []
1471     for fileOut in (self.output_file+self.output_file_sandbox):
1472     if fileOut.find('crab_fjr') == -1:
1473     allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1474     listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1475     txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1476     txt += 'ls -gGhrta;\n'
1477     txt += 'sum=0;\n'
1478     txt += 'for file in '+str(allOutFiles)+' ; do\n'
1479     txt += ' if [ -e $file ]; then\n'
1480     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1481     txt += ' sum=`expr $sum + $tt`\n'
1482     txt += ' else\n'
1483     txt += ' echo "WARNING: output file $file not found!"\n'
1484     txt += ' fi\n'
1485     txt += 'done\n'
1486     txt += 'echo "Total Output dimension: $sum";\n'
1487     txt += 'limit='+str(limit)+';\n'
1488     txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1489     txt += 'if [ $limit -lt $sum ]; then\n'
1490     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1491     txt += ' echo " checking the output file sizes..."\n'
1492     """
1493     txt += ' dim=0;\n'
1494     txt += ' exclude=0;\n'
1495     txt += ' for files in '+str(allOutFiles)+' ; do\n'
1496     txt += ' sumTemp=0;\n'
1497     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1498     txt += ' if [ $file != $file2 ]; then\n'
1499     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1500     txt += ' sumTemp=`expr $sumTemp + $tt`;\n'
1501     txt += ' fi\n'
1502     txt += ' done\n'
1503     txt += ' if [ $sumTemp -lt $limit ]; then\n'
1504     txt += ' if [ $dim -lt $sumTemp ]; then\n'
1505     txt += ' dim=$sumTemp;\n'
1506     txt += ' exclude=$file;\n'
1507     txt += ' fi\n'
1508     txt += ' fi\n'
1509     txt += ' done\n'
1510     txt += ' echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1511     """
1512     txt += ' tot=0;\n'
1513     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1514     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1515     txt += ' tot=`expr $tot + $tt`;\n'
1516     txt += ' if [ $limit -lt $tot ]; then\n'
1517     txt += ' tot=`expr $tot - $tt`;\n'
1518     txt += ' fileLast=$file;\n'
1519     txt += ' break;\n'
1520     txt += ' fi\n'
1521     txt += ' done\n'
1522     txt += ' echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1523     txt += ' flag=0;\n'
1524     txt += ' for filess in '+str(allOutFiles)+' ; do\n'
1525     txt += ' if [ $fileLast = $filess ]; then\n'
1526     txt += ' flag=1;\n'
1527     txt += ' fi\n'
1528     txt += ' if [ $flag -eq 1 ]; then\n'
1529     txt += ' rm -f $filess;\n'
1530     txt += ' fi\n'
1531     txt += ' done\n'
1532     txt += ' ls -agGhrt;\n'
1533     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1534     txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1535     txt += ' exit_status=70000;\n'
1536     txt += 'else'
1537     txt += ' echo "Total Output dimension $sum is fine.";\n'
1538     txt += 'fi\n'
1539     txt += 'echo "*****************************************"\n'
1540     txt += 'echo "*** Ending output sandbox limit check ***"\n'
1541     txt += 'echo "*****************************************"\n'
1542     return txt