ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.126
Committed: Wed Oct 10 15:04:09 2007 UTC (17 years, 6 months ago) by mcinquil
Content type: text/x-python
Branch: MAIN
Changes since 1.125: +15 -17 lines
Log Message:
Removing not proper print out

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8    
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 gutsche 1.38 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 gutsche 1.3 self._params = {}
17     self.cfg_params = cfg_params
18 gutsche 1.38
19 fanzago 1.115 # init BlackWhiteListParser
20     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21    
22 gutsche 1.72 try:
23     self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24     except KeyError:
25 slacapra 1.86 self.MaxTarBallSize = 9.5
26 gutsche 1.72
27 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
28 gutsche 1.38 self.ncjobs = ncjobs
29    
30 slacapra 1.1 log = common.logger
31    
32     self.scram = Scram.Scram(cfg_params)
33     self.additional_inbox_files = []
34     self.scriptExe = ''
35     self.executable = ''
36 slacapra 1.71 self.executable_arch = self.scram.getArch()
37 slacapra 1.1 self.tgz_name = 'default.tgz'
38 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
39 corvo 1.56 self.scriptName = 'CMSSW.sh'
40 spiga 1.42 self.pset = '' #scrip use case Da
41     self.datasetPath = '' #scrip use case Da
42 gutsche 1.3
43 gutsche 1.50 # set FJR file name
44     self.fjrFileName = 'crab_fjr.xml'
45    
46 slacapra 1.1 self.version = self.scram.getSWVersion()
47 spiga 1.114
48     #
49     # Try to block creation in case of arch/version mismatch
50     #
51    
52     a = string.split(self.version, "_")
53    
54     if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
56     raise CrabException(msg)
57     if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59     raise CrabException(msg)
60    
61 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
62 gutsche 1.5 self.setParam_('application', self.version)
63 slacapra 1.47
64 slacapra 1.1 ### collect Data cards
65 gutsche 1.66
66     ## get DBS mode
67     try:
68 slacapra 1.86 self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 gutsche 1.66 except KeyError:
70 slacapra 1.86 self.use_dbs_1 = 0
71 gutsche 1.66
72 slacapra 1.1 try:
73 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
74     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
75     if string.lower(tmp)=='none':
76     self.datasetPath = None
77 slacapra 1.21 self.selectNoInput = 1
78 slacapra 1.9 else:
79     self.datasetPath = tmp
80 slacapra 1.21 self.selectNoInput = 0
81 slacapra 1.1 except KeyError:
82 gutsche 1.3 msg = "Error: datasetpath not defined "
83 slacapra 1.1 raise CrabException(msg)
84 gutsche 1.5
85     # ML monitoring
86     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
87 slacapra 1.9 if not self.datasetPath:
88     self.setParam_('dataset', 'None')
89     self.setParam_('owner', 'None')
90     else:
91 gutsche 1.92 try:
92     datasetpath_split = self.datasetPath.split("/")
93     # standard style
94 mcinquil 1.120 self.setParam_('datasetFull', self.datasetPath)
95 gutsche 1.92 if self.use_dbs_1 == 1 :
96     self.setParam_('dataset', datasetpath_split[1])
97     self.setParam_('owner', datasetpath_split[-1])
98     else:
99     self.setParam_('dataset', datasetpath_split[1])
100     self.setParam_('owner', datasetpath_split[2])
101     except:
102     self.setParam_('dataset', self.datasetPath)
103     self.setParam_('owner', self.datasetPath)
104    
105 gutsche 1.8 self.setTaskid_()
106     self.setParam_('taskId', self.cfg_params['taskId'])
107 gutsche 1.5
108 slacapra 1.1 self.dataTiers = []
109    
110     ## now the application
111     try:
112     self.executable = cfg_params['CMSSW.executable']
113 gutsche 1.5 self.setParam_('exe', self.executable)
114 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
115     msg = "Default executable cmsRun overridden. Switch to " + self.executable
116     log.debug(3,msg)
117     except KeyError:
118     self.executable = 'cmsRun'
119 gutsche 1.5 self.setParam_('exe', self.executable)
120 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
121     log.debug(3,msg)
122     pass
123    
124     try:
125     self.pset = cfg_params['CMSSW.pset']
126     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 spiga 1.42 if self.pset.lower() != 'none' :
128     if (not os.path.exists(self.pset)):
129     raise CrabException("User defined PSet file "+self.pset+" does not exist")
130     else:
131     self.pset = None
132 slacapra 1.1 except KeyError:
133     raise CrabException("PSet file missing. Cannot run cmsRun ")
134    
135     # output files
136 slacapra 1.53 ## stuff which must be returned always via sandbox
137     self.output_file_sandbox = []
138    
139     # add fjr report by default via sandbox
140     self.output_file_sandbox.append(self.fjrFileName)
141    
142     # other output files to be returned via sandbox or copied to SE
143 slacapra 1.1 try:
144     self.output_file = []
145     tmp = cfg_params['CMSSW.output_file']
146     if tmp != '':
147     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
148     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
149     for tmp in tmpOutFiles:
150     tmp=string.strip(tmp)
151     self.output_file.append(tmp)
152     pass
153     else:
154 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155 slacapra 1.1 pass
156     pass
157     except KeyError:
158 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159 slacapra 1.1 pass
160    
161     # script_exe file as additional file in inputSandbox
162     try:
163 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
164     if self.scriptExe != '':
165     if not os.path.isfile(self.scriptExe):
166 slacapra 1.64 msg ="ERROR. file "+self.scriptExe+" not found"
167 slacapra 1.10 raise CrabException(msg)
168 spiga 1.42 self.additional_inbox_files.append(string.strip(self.scriptExe))
169 slacapra 1.1 except KeyError:
170 spiga 1.42 self.scriptExe = ''
171 slacapra 1.70
172 spiga 1.42 #CarlosDaniele
173     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174 slacapra 1.70 msg ="Error. script_exe not defined"
175 spiga 1.42 raise CrabException(msg)
176    
177 slacapra 1.1 ## additional input files
178     try:
179 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
180 slacapra 1.70 for tmp in tmpAddFiles:
181     tmp = string.strip(tmp)
182     dirname = ''
183     if not tmp[0]=="/": dirname = "."
184 corvo 1.85 files = []
185     if string.find(tmp,"*")>-1:
186     files = glob.glob(os.path.join(dirname, tmp))
187     if len(files)==0:
188     raise CrabException("No additional input file found with this pattern: "+tmp)
189     else:
190     files.append(tmp)
191 slacapra 1.70 for file in files:
192     if not os.path.exists(file):
193     raise CrabException("Additional input file not found: "+file)
194 slacapra 1.45 pass
195 slacapra 1.105 # fname = string.split(file, '/')[-1]
196     # storedFile = common.work_space.pathForTgz()+'share/'+fname
197     # shutil.copyfile(file, storedFile)
198     self.additional_inbox_files.append(string.strip(file))
199 slacapra 1.1 pass
200     pass
201 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
202 slacapra 1.1 except KeyError:
203     pass
204    
205 slacapra 1.9 # files per job
206 slacapra 1.1 try:
207 gutsche 1.35 if (cfg_params['CMSSW.files_per_jobs']):
208     raise CrabException("files_per_jobs no longer supported. Quitting.")
209 gutsche 1.3 except KeyError:
210 gutsche 1.35 pass
211 gutsche 1.3
212 slacapra 1.9 ## Events per job
213 gutsche 1.3 try:
214 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
215 slacapra 1.9 self.selectEventsPerJob = 1
216 gutsche 1.3 except KeyError:
217 slacapra 1.9 self.eventsPerJob = -1
218     self.selectEventsPerJob = 0
219    
220 slacapra 1.22 ## number of jobs
221     try:
222     self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
223     self.selectNumberOfJobs = 1
224     except KeyError:
225     self.theNumberOfJobs = 0
226     self.selectNumberOfJobs = 0
227 slacapra 1.10
228 gutsche 1.35 try:
229     self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
230     self.selectTotalNumberEvents = 1
231     except KeyError:
232     self.total_number_of_events = 0
233     self.selectTotalNumberEvents = 0
234    
235 spiga 1.42 if self.pset != None: #CarlosDaniele
236     if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
237     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
238     raise CrabException(msg)
239     else:
240     if (self.selectNumberOfJobs == 0):
241     msg = 'Must specify number_of_jobs.'
242     raise CrabException(msg)
243 gutsche 1.35
244 slacapra 1.22 ## source seed for pythia
245     try:
246     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
247     except KeyError:
248 slacapra 1.23 self.sourceSeed = None
249     common.logger.debug(5,"No seed given")
250 slacapra 1.22
251 slacapra 1.28 try:
252     self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
253     except KeyError:
254     self.sourceSeedVtx = None
255     common.logger.debug(5,"No vertex seed given")
256 slacapra 1.90
257     try:
258     self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259     except KeyError:
260     self.sourceSeedG4 = None
261     common.logger.debug(5,"No g4 sim hits seed given")
262    
263     try:
264     self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265     except KeyError:
266     self.sourceSeedMix = None
267     common.logger.debug(5,"No mix seed given")
268    
269 spiga 1.57 try:
270     self.firstRun = int(cfg_params['CMSSW.first_run'])
271     except KeyError:
272     self.firstRun = None
273     common.logger.debug(5,"No first run given")
274 spiga 1.42 if self.pset != None: #CarlosDaniele
275 slacapra 1.97 ver = string.split(self.version,"_")
276     if (int(ver[1])>=1 and int(ver[2])>=5):
277     import PsetManipulator150 as pp
278     else:
279     import PsetManipulator as pp
280     PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
281 gutsche 1.3
282 slacapra 1.1 #DBSDLS-start
283     ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
284     self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
285     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
286 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
287 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
288 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
289 gutsche 1.35 blockSites = {}
290 slacapra 1.9 if self.datasetPath:
291 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
292 slacapra 1.1 #DBSDLS-end
293    
294     self.tgzNameWithPath = self.getTarBall(self.executable)
295 slacapra 1.10
296 slacapra 1.9 ## Select Splitting
297 spiga 1.42 if self.selectNoInput:
298     if self.pset == None: #CarlosDaniele
299     self.jobSplittingForScript()
300     else:
301     self.jobSplittingNoInput()
302 gutsche 1.92 else:
303 corvo 1.56 self.jobSplittingByBlocks(blockSites)
304 gutsche 1.5
305 slacapra 1.22 # modify Pset
306 spiga 1.42 if self.pset != None: #CarlosDaniele
307 slacapra 1.86 try:
308     if (self.datasetPath): # standard job
309     # allow to processa a fraction of events in a file
310 slacapra 1.90 PsetEdit.inputModule("INPUT")
311     PsetEdit.maxEvent("INPUTMAXEVENTS")
312     PsetEdit.skipEvent("INPUTSKIPEVENTS")
313 slacapra 1.86 else: # pythia like job
314 slacapra 1.90 PsetEdit.maxEvent(self.eventsPerJob)
315 slacapra 1.86 if (self.firstRun):
316 slacapra 1.90 PsetEdit.pythiaFirstRun("INPUTFIRSTRUN") #First Run
317 slacapra 1.86 if (self.sourceSeed) :
318 slacapra 1.90 PsetEdit.pythiaSeed("INPUT")
319 slacapra 1.86 if (self.sourceSeedVtx) :
320 slacapra 1.90 PsetEdit.vtxSeed("INPUTVTX")
321     if (self.sourceSeedG4) :
322 slacapra 1.118 PsetEdit.g4Seed("INPUTG4")
323 slacapra 1.90 if (self.sourceSeedMix) :
324 slacapra 1.118 PsetEdit.mixSeed("INPUTMIX")
325 slacapra 1.86 # add FrameworkJobReport to parameter-set
326 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
327     PsetEdit.psetWriter(self.configFilename())
328 slacapra 1.86 except:
329     msg='Error while manipuliating ParameterSet: exiting...'
330     raise CrabException(msg)
331 gutsche 1.3
332 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
333    
334 slacapra 1.86 import DataDiscovery
335     import DataDiscovery_DBS2
336     import DataLocation
337 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
338    
339     datasetPath=self.datasetPath
340    
341 slacapra 1.1 ## Contact the DBS
342 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
343 slacapra 1.1 try:
344 gutsche 1.66
345 slacapra 1.86 if self.use_dbs_1 == 1 :
346     self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
347     else :
348 corvo 1.85 self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
349 slacapra 1.1 self.pubdata.fetchDBSInfo()
350    
351 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
352 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
353     raise CrabException(msg)
354 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
355 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
356     raise CrabException(msg)
357 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
358 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
359 slacapra 1.1 raise CrabException(msg)
360 gutsche 1.67 except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
361     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
362     raise CrabException(msg)
363     except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
364     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
365     raise CrabException(msg)
366     except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
367     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
368     raise CrabException(msg)
369 slacapra 1.1
370 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
371 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
372     self.eventsbyfile=self.pubdata.getEventsPerFile()
373 gutsche 1.3
374 slacapra 1.1 ## get max number of events
375     self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
376    
377     ## Contact the DLS and build a list of sites hosting the fileblocks
378     try:
379 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
380 gutsche 1.6 dataloc.fetchDLSInfo()
381 slacapra 1.41 except DataLocation.DataLocationError , ex:
382 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
383     raise CrabException(msg)
384    
385    
386 gutsche 1.35 sites = dataloc.getSites()
387     allSites = []
388     listSites = sites.values()
389 slacapra 1.63 for listSite in listSites:
390     for oneSite in listSite:
391 gutsche 1.35 allSites.append(oneSite)
392     allSites = self.uniquelist(allSites)
393 gutsche 1.3
394 gutsche 1.92 # screen output
395     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
396    
397 gutsche 1.35 return sites
398 gutsche 1.3
399 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
400 slacapra 1.9 """
401 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
402     and no more than one block.
403     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
404     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
405     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
406     self.maxEvents, self.filesbyblock
407     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
408     self.total_number_of_jobs - Total # of jobs
409     self.list_of_args - File(s) job will run on (a list of lists)
410     """
411    
412     # ---- Handle the possible job splitting configurations ---- #
413     if (self.selectTotalNumberEvents):
414     totalEventsRequested = self.total_number_of_events
415     if (self.selectEventsPerJob):
416     eventsPerJobRequested = self.eventsPerJob
417     if (self.selectNumberOfJobs):
418     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
419    
420     # If user requested all the events in the dataset
421     if (totalEventsRequested == -1):
422     eventsRemaining=self.maxEvents
423     # If user requested more events than are in the dataset
424     elif (totalEventsRequested > self.maxEvents):
425     eventsRemaining = self.maxEvents
426     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
427     # If user requested less events than are in the dataset
428     else:
429     eventsRemaining = totalEventsRequested
430 slacapra 1.22
431 slacapra 1.41 # If user requested more events per job than are in the dataset
432     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
433     eventsPerJobRequested = self.maxEvents
434    
435 gutsche 1.35 # For user info at end
436     totalEventCount = 0
437 gutsche 1.3
438 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
439     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
440 slacapra 1.22
441 gutsche 1.35 if (self.selectNumberOfJobs):
442     common.logger.message("May not create the exact number_of_jobs requested.")
443 slacapra 1.23
444 gutsche 1.38 if ( self.ncjobs == 'all' ) :
445     totalNumberOfJobs = 999999999
446     else :
447     totalNumberOfJobs = self.ncjobs
448    
449    
450 gutsche 1.35 blocks = blockSites.keys()
451     blockCount = 0
452     # Backup variable in case self.maxEvents counted events in a non-included block
453     numBlocksInDataset = len(blocks)
454 gutsche 1.3
455 gutsche 1.35 jobCount = 0
456     list_of_lists = []
457 gutsche 1.3
458 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
459     jobsOfBlock = {}
460    
461 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
462     # ---- we've met the requested total # of events ---- #
463 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
464 gutsche 1.35 block = blocks[blockCount]
465 gutsche 1.44 blockCount += 1
466 gutsche 1.104 if block not in jobsOfBlock.keys() :
467     jobsOfBlock[block] = []
468 gutsche 1.44
469 gutsche 1.68 if self.eventsbyblock.has_key(block) :
470     numEventsInBlock = self.eventsbyblock[block]
471     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
472 slacapra 1.9
473 gutsche 1.68 files = self.filesbyblock[block]
474     numFilesInBlock = len(files)
475     if (numFilesInBlock <= 0):
476     continue
477     fileCount = 0
478    
479     # ---- New block => New job ---- #
480     parString = "\\{"
481     # counter for number of events in files currently worked on
482     filesEventCount = 0
483     # flag if next while loop should touch new file
484     newFile = 1
485     # job event counter
486     jobSkipEventCount = 0
487 slacapra 1.9
488 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
489     # ---- total # of events or we've gone over all the files in this block ---- #
490     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
491     file = files[fileCount]
492     if newFile :
493     try:
494     numEventsInFile = self.eventsbyfile[file]
495     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
496     # increase filesEventCount
497     filesEventCount += numEventsInFile
498     # Add file to current job
499     parString += '\\\"' + file + '\\\"\,'
500     newFile = 0
501     except KeyError:
502     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
503 slacapra 1.41
504 gutsche 1.38
505 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
506     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
507     # if last file in block
508     if ( fileCount == numFilesInBlock-1 ) :
509     # end job using last file, use remaining events in block
510     # close job and touch new file
511     fullString = parString[:-2]
512     fullString += '\\}'
513     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
514     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
515     self.jobDestination.append(blockSites[block])
516     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
517 gutsche 1.92 # fill jobs of block dictionary
518 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
519 gutsche 1.68 # reset counter
520     jobCount = jobCount + 1
521     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
522     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
523     jobSkipEventCount = 0
524     # reset file
525     parString = "\\{"
526     filesEventCount = 0
527     newFile = 1
528     fileCount += 1
529     else :
530     # go to next file
531     newFile = 1
532     fileCount += 1
533     # if events in file equal to eventsPerJobRequested
534     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
535 gutsche 1.38 # close job and touch new file
536     fullString = parString[:-2]
537     fullString += '\\}'
538 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
539     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
540 gutsche 1.38 self.jobDestination.append(blockSites[block])
541     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
542 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
543 gutsche 1.38 # reset counter
544     jobCount = jobCount + 1
545 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
546     eventsRemaining = eventsRemaining - eventsPerJobRequested
547 gutsche 1.38 jobSkipEventCount = 0
548     # reset file
549     parString = "\\{"
550     filesEventCount = 0
551     newFile = 1
552     fileCount += 1
553 gutsche 1.68
554     # if more events in file remain than eventsPerJobRequested
555 gutsche 1.38 else :
556 gutsche 1.68 # close job but don't touch new file
557     fullString = parString[:-2]
558     fullString += '\\}'
559     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
560     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
561     self.jobDestination.append(blockSites[block])
562     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
563 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
564 gutsche 1.68 # increase counter
565     jobCount = jobCount + 1
566     totalEventCount = totalEventCount + eventsPerJobRequested
567     eventsRemaining = eventsRemaining - eventsPerJobRequested
568     # calculate skip events for last file
569     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
570     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
571     # remove all but the last file
572     filesEventCount = self.eventsbyfile[file]
573     parString = "\\{"
574     parString += '\\\"' + file + '\\\"\,'
575     pass # END if
576     pass # END while (iterate over files in the block)
577 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
578 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
579 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
580 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
581 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
582 slacapra 1.22
583 gutsche 1.92 # screen output
584     screenOutput = "List of jobs and available destination sites:\n\n"
585    
586 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
587     noSiteBlock = []
588     bloskNoSite = []
589    
590 gutsche 1.92 blockCounter = 0
591 gutsche 1.104 for block in blocks:
592     if block in jobsOfBlock.keys() :
593     blockCounter += 1
594 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
595 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
596     noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
597     bloskNoSite.append( blockCounter )
598    
599     common.logger.message(screenOutput)
600 mcinquil 1.126 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
601     msg = 'WARNING: No sites are hosting any part of data for block:\n '
602     virgola = ""
603     if len(bloskNoSite) > 1:
604     virgola = ","
605     for block in bloskNoSite:
606     msg += ' ' + str(block) + virgola
607     msg += '\n Related jobs:\n '
608     virgola = ""
609     if len(noSiteBlock) > 1:
610     virgola = ","
611     for range_jobs in noSiteBlock:
612     msg += str(range_jobs) + virgola
613     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
614     common.logger.message(msg)
615 gutsche 1.92
616 slacapra 1.9 self.list_of_args = list_of_lists
617     return
618    
619 slacapra 1.21 def jobSplittingNoInput(self):
620 slacapra 1.9 """
621     Perform job splitting based on number of event per job
622     """
623     common.logger.debug(5,'Splitting per events')
624     common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
625 slacapra 1.22 common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
626 slacapra 1.9 common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
627    
628 slacapra 1.10 if (self.total_number_of_events < 0):
629     msg='Cannot split jobs per Events with "-1" as total number of events'
630     raise CrabException(msg)
631    
632 slacapra 1.22 if (self.selectEventsPerJob):
633 spiga 1.65 if (self.selectTotalNumberEvents):
634     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
635     elif(self.selectNumberOfJobs) :
636     self.total_number_of_jobs =self.theNumberOfJobs
637     self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
638    
639 slacapra 1.22 elif (self.selectNumberOfJobs) :
640     self.total_number_of_jobs = self.theNumberOfJobs
641     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
642 spiga 1.65
643 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
644    
645     # is there any remainder?
646     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
647    
648     common.logger.debug(5,'Check '+str(check))
649    
650 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
651 slacapra 1.9 if check > 0:
652 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
653 slacapra 1.9
654 slacapra 1.10 # argument is seed number.$i
655 slacapra 1.9 self.list_of_args = []
656     for i in range(self.total_number_of_jobs):
657 gutsche 1.35 ## Since there is no input, any site is good
658 slacapra 1.86 # self.jobDestination.append(["Any"])
659 spiga 1.42 self.jobDestination.append([""]) #must be empty to write correctly the xml
660 slacapra 1.90 args=[]
661 spiga 1.57 if (self.firstRun):
662     ## pythia first run
663 slacapra 1.86 #self.list_of_args.append([(str(self.firstRun)+str(i))])
664 slacapra 1.90 args.append(str(self.firstRun)+str(i))
665 spiga 1.57 else:
666     ## no first run
667 slacapra 1.86 #self.list_of_args.append([str(i)])
668 slacapra 1.90 args.append(str(i))
669 slacapra 1.23 if (self.sourceSeed):
670 slacapra 1.90 args.append(str(self.sourceSeed)+str(i))
671 slacapra 1.28 if (self.sourceSeedVtx):
672 slacapra 1.90 ## + vtx random seed
673     args.append(str(self.sourceSeedVtx)+str(i))
674     if (self.sourceSeedG4):
675     ## + G4 random seed
676     args.append(str(self.sourceSeedG4)+str(i))
677     if (self.sourceSeedMix):
678     ## + Mix random seed
679     args.append(str(self.sourceSeedMix)+str(i))
680     pass
681     pass
682     self.list_of_args.append(args)
683     pass
684 slacapra 1.86
685 slacapra 1.90 # print self.list_of_args
686 gutsche 1.3
687     return
688    
689 spiga 1.42
690     def jobSplittingForScript(self):#CarlosDaniele
691     """
692     Perform job splitting based on number of job
693     """
694     common.logger.debug(5,'Splitting per job')
695     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
696    
697     self.total_number_of_jobs = self.theNumberOfJobs
698    
699     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
700    
701     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
702    
703     # argument is seed number.$i
704     self.list_of_args = []
705     for i in range(self.total_number_of_jobs):
706     ## Since there is no input, any site is good
707     # self.jobDestination.append(["Any"])
708     self.jobDestination.append([""])
709     ## no random seed
710     self.list_of_args.append([str(i)])
711     return
712    
713 gutsche 1.3 def split(self, jobParams):
714    
715     common.jobDB.load()
716     #### Fabio
717     njobs = self.total_number_of_jobs
718 slacapra 1.9 arglist = self.list_of_args
719 gutsche 1.3 # create the empty structure
720     for i in range(njobs):
721     jobParams.append("")
722    
723     for job in range(njobs):
724 slacapra 1.17 jobParams[job] = arglist[job]
725     # print str(arglist[job])
726     # print jobParams[job]
727 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
728 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
729     common.jobDB.setDestination(job, self.jobDestination[job])
730 gutsche 1.3
731     common.jobDB.save()
732     return
733    
734     def getJobTypeArguments(self, nj, sched):
735 slacapra 1.17 result = ''
736     for i in common.jobDB.arguments(nj):
737     result=result+str(i)+" "
738     return result
739 gutsche 1.3
740     def numberOfJobs(self):
741     # Fabio
742     return self.total_number_of_jobs
743    
744 slacapra 1.1 def getTarBall(self, exe):
745     """
746     Return the TarBall with lib and exe
747     """
748    
749     # if it exist, just return it
750 corvo 1.56 #
751     # Marco. Let's start to use relative path for Boss XML files
752     #
753     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
754 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
755     return self.tgzNameWithPath
756    
757     # Prepare a tar gzipped file with user binaries.
758     self.buildTar_(exe)
759    
760     return string.strip(self.tgzNameWithPath)
761    
762     def buildTar_(self, executable):
763    
764     # First of all declare the user Scram area
765     swArea = self.scram.getSWArea_()
766     #print "swArea = ", swArea
767 slacapra 1.63 # swVersion = self.scram.getSWVersion()
768     # print "swVersion = ", swVersion
769 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
770     #print "swReleaseTop = ", swReleaseTop
771    
772     ## check if working area is release top
773     if swReleaseTop == '' or swArea == swReleaseTop:
774     return
775    
776 slacapra 1.61 import tarfile
777     try: # create tar ball
778     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
779     ## First find the executable
780 slacapra 1.86 if (self.executable != ''):
781 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
782     if ( not exeWithPath ):
783     raise CrabException('User executable '+executable+' not found')
784    
785     ## then check if it's private or not
786     if exeWithPath.find(swReleaseTop) == -1:
787     # the exe is private, so we must ship
788     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
789     path = swArea+'/'
790 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
791     if exeWithPath.find(path) >= 0 :
792     exe = string.replace(exeWithPath, path,'')
793     tar.add(path+exe,os.path.basename(executable))
794     else :
795     tar.add(exeWithPath,os.path.basename(executable))
796 slacapra 1.61 pass
797     else:
798     # the exe is from release, we'll find it on WN
799     pass
800    
801     ## Now get the libraries: only those in local working area
802     libDir = 'lib'
803     lib = swArea+'/' +libDir
804     common.logger.debug(5,"lib "+lib+" to be tarred")
805     if os.path.exists(lib):
806     tar.add(lib,libDir)
807    
808     ## Now check if module dir is present
809     moduleDir = 'module'
810     module = swArea + '/' + moduleDir
811     if os.path.isdir(module):
812     tar.add(module,moduleDir)
813    
814     ## Now check if any data dir(s) is present
815     swAreaLen=len(swArea)
816     for root, dirs, files in os.walk(swArea):
817     if "data" in dirs:
818     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
819     tar.add(root+"/data",root[swAreaLen:]+"/data")
820    
821     ## Add ProdAgent dir to tar
822     paDir = 'ProdAgentApi'
823     pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
824     if os.path.isdir(pa):
825     tar.add(pa,paDir)
826 fanzago 1.93
827     ### FEDE FOR DBS PUBLICATION
828     ## Add PRODCOMMON dir to tar
829     prodcommonDir = 'ProdCommon'
830     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
831     if os.path.isdir(prodcommonPath):
832     tar.add(prodcommonPath,prodcommonDir)
833     #############################
834 slacapra 1.61
835     common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
836     tar.close()
837     except :
838     raise CrabException('Could not create tar-ball')
839 gutsche 1.72
840     ## check for tarball size
841     tarballinfo = os.stat(self.tgzNameWithPath)
842     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
843     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
844    
845 slacapra 1.61 ## create tar-ball with ML stuff
846 corvo 1.58 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
847 slacapra 1.61 try:
848     tar = tarfile.open(self.MLtgzfile, "w:gz")
849     path=os.environ['CRABDIR'] + '/python/'
850     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
851     tar.add(path+file,file)
852     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
853     tar.close()
854     except :
855 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
856    
857 slacapra 1.1 return
858    
859 slacapra 1.97 def additionalInputFileTgz(self):
860     """
861     Put all additional files into a tar ball and return its name
862     """
863     import tarfile
864     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
865     tar = tarfile.open(tarName, "w:gz")
866     for file in self.additional_inbox_files:
867     tar.add(file,string.split(file,'/')[-1])
868     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
869     tar.close()
870     return tarName
871    
872 slacapra 1.1 def wsSetupEnvironment(self, nj):
873     """
874     Returns part of a job script which prepares
875     the execution environment for the job 'nj'.
876     """
877     # Prepare JobType-independent part
878 gutsche 1.3 txt = ''
879    
880     ## OLI_Daniele at this level middleware already known
881    
882     txt += 'if [ $middleware == LCG ]; then \n'
883 fanzago 1.115 txt += ' echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
884     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
885     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
886 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
887     txt += 'elif [ $middleware == OSG ]; then\n'
888 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
889     txt += ' echo "Created working directory: $WORKING_DIR"\n'
890 gutsche 1.3 txt += ' if [ ! -d $WORKING_DIR ] ;then\n'
891 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
892 slacapra 1.90 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
893     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
894     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
895 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
896     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
897     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
898 gutsche 1.3 txt += ' exit 1\n'
899     txt += ' fi\n'
900     txt += '\n'
901     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
902     txt += ' cd $WORKING_DIR\n'
903     txt += self.wsSetupCMSOSGEnvironment_()
904 fanzago 1.115 txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
905     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
906 gutsche 1.3 txt += 'fi\n'
907 slacapra 1.1
908     # Prepare JobType-specific part
909     scram = self.scram.commandName()
910     txt += '\n\n'
911     txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
912     txt += scram+' project CMSSW '+self.version+'\n'
913     txt += 'status=$?\n'
914     txt += 'if [ $status != 0 ] ; then\n'
915 gutsche 1.7 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
916 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
917 gutsche 1.7 txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
918 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
919 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
920     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
921     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
922 gutsche 1.3 ## OLI_Daniele
923     txt += ' if [ $middleware == OSG ]; then \n'
924     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
925     txt += ' cd $RUNTIME_AREA\n'
926     txt += ' /bin/rm -rf $WORKING_DIR\n'
927     txt += ' if [ -d $WORKING_DIR ] ;then\n'
928 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
929     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
930     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
931     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
932 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
933     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
934     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
935 gutsche 1.3 txt += ' fi\n'
936     txt += ' fi \n'
937     txt += ' exit 1 \n'
938 slacapra 1.1 txt += 'fi \n'
939     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
940     txt += 'cd '+self.version+'\n'
941 fanzago 1.99 ########## FEDE FOR DBS2 ######################
942     txt += 'SOFTWARE_DIR=`pwd`\n'
943     txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
944     ###############################################
945 slacapra 1.1 ### needed grep for bug in scramv1 ###
946 corvo 1.58 txt += scram+' runtime -sh\n'
947 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
948 corvo 1.58 txt += 'echo $PATH\n'
949 slacapra 1.1
950     # Handle the arguments:
951     txt += "\n"
952 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
953 slacapra 1.1 txt += "\n"
954 mkirn 1.32 # txt += "narg=$#\n"
955     txt += "if [ $nargs -lt 2 ]\n"
956 slacapra 1.1 txt += "then\n"
957 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
958 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
959 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
960 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
961 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
962     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
963     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
964 gutsche 1.3 ## OLI_Daniele
965     txt += ' if [ $middleware == OSG ]; then \n'
966     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
967     txt += ' cd $RUNTIME_AREA\n'
968     txt += ' /bin/rm -rf $WORKING_DIR\n'
969     txt += ' if [ -d $WORKING_DIR ] ;then\n'
970 fanzago 1.96 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
971     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
972     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
973     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
974 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
975     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
976     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
977 gutsche 1.3 txt += ' fi\n'
978     txt += ' fi \n'
979 slacapra 1.1 txt += " exit 1\n"
980     txt += "fi\n"
981     txt += "\n"
982    
983     # Prepare job-specific part
984     job = common.job_list[nj]
985 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
986     if (self.datasetPath):
987     txt += '\n'
988     txt += 'DatasetPath='+self.datasetPath+'\n'
989    
990     datasetpath_split = self.datasetPath.split("/")
991    
992     txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
993     txt += 'DataTier='+datasetpath_split[2]+'\n'
994 fanzago 1.96 #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
995     txt += 'ApplicationFamily=cmsRun\n'
996 fanzago 1.93
997     else:
998     txt += 'DatasetPath=MCDataTier\n'
999     txt += 'PrimaryDataset=null\n'
1000     txt += 'DataTier=null\n'
1001 fanzago 1.96 #txt += 'ProcessedDataset=null\n'
1002 fanzago 1.93 txt += 'ApplicationFamily=MCDataTier\n'
1003 spiga 1.42 if self.pset != None: #CarlosDaniele
1004     pset = os.path.basename(job.configFilename())
1005     txt += '\n'
1006 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
1007 spiga 1.42 if (self.datasetPath): # standard job
1008     #txt += 'InputFiles=$2\n'
1009     txt += 'InputFiles=${args[1]}\n'
1010     txt += 'MaxEvents=${args[2]}\n'
1011     txt += 'SkipEvents=${args[3]}\n'
1012     txt += 'echo "Inputfiles:<$InputFiles>"\n'
1013 slacapra 1.90 txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1014 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1015 slacapra 1.90 txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1016 spiga 1.42 txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1017 slacapra 1.90 txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1018 spiga 1.42 else: # pythia like job
1019 slacapra 1.90 seedIndex=1
1020     if (self.firstRun):
1021     txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
1022 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
1023 slacapra 1.90 txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1024     seedIndex=seedIndex+1
1025    
1026 spiga 1.57 if (self.sourceSeed):
1027 slacapra 1.90 txt += 'Seed=${args['+str(seedIndex)+']}\n'
1028     txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1029     seedIndex=seedIndex+1
1030     ## the following seeds are not always present
1031 spiga 1.42 if (self.sourceSeedVtx):
1032 slacapra 1.90 txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1033 spiga 1.42 txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1034 slacapra 1.90 txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1035     seedIndex += 1
1036     if (self.sourceSeedG4):
1037     txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1038     txt += 'echo "G4Seed: <$G4Seed>"\n'
1039     txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1040     seedIndex += 1
1041     if (self.sourceSeedMix):
1042     txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1043     txt += 'echo "MixSeed: <$mixSeed>"\n'
1044     txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1045     seedIndex += 1
1046     pass
1047     pass
1048     txt += 'mv -f '+pset+' pset.cfg\n'
1049 slacapra 1.1
1050     if len(self.additional_inbox_files) > 0:
1051 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1052     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1053     txt += 'fi\n'
1054 slacapra 1.1 pass
1055    
1056 spiga 1.42 if self.pset != None: #CarlosDaniele
1057     txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1058    
1059     txt += '\n'
1060     txt += 'echo "***** cat pset.cfg *********"\n'
1061     txt += 'cat pset.cfg\n'
1062     txt += 'echo "****** end pset.cfg ********"\n'
1063     txt += '\n'
1064 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1065 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1066     txt += 'echo "PSETHASH = $PSETHASH" \n'
1067 fanzago 1.93 ##############
1068     txt += '\n'
1069     # txt += 'echo "***** cat pset1.cfg *********"\n'
1070     # txt += 'cat pset1.cfg\n'
1071     # txt += 'echo "****** end pset1.cfg ********"\n'
1072 gutsche 1.3 return txt
1073    
1074 slacapra 1.63 def wsBuildExe(self, nj=0):
1075 gutsche 1.3 """
1076     Put in the script the commands to build an executable
1077     or a library.
1078     """
1079    
1080     txt = ""
1081    
1082     if os.path.isfile(self.tgzNameWithPath):
1083     txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
1084     txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1085     txt += 'untar_status=$? \n'
1086     txt += 'if [ $untar_status -ne 0 ]; then \n'
1087     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1088     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
1089 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1090 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
1091     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1092     txt += ' cd $RUNTIME_AREA\n'
1093     txt += ' /bin/rm -rf $WORKING_DIR\n'
1094     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1095 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1096     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
1097     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1098     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1099     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1100     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1101     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1102 gutsche 1.3 txt += ' fi\n'
1103     txt += ' fi \n'
1104     txt += ' \n'
1105 gutsche 1.7 txt += ' exit 1 \n'
1106 gutsche 1.3 txt += 'else \n'
1107     txt += ' echo "Successful untar" \n'
1108     txt += 'fi \n'
1109 gutsche 1.50 txt += '\n'
1110 fanzago 1.93 txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1111 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1112 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
1113 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1114     #txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1115 fanzago 1.93 #txt += ' export PYTHONPATH=ProdAgentApi\n'
1116 gutsche 1.50 txt += 'else\n'
1117 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1118     #txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1119 fanzago 1.93 #txt += ' export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1120     txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1121     ###################
1122 gutsche 1.50 txt += 'fi\n'
1123     txt += '\n'
1124    
1125 gutsche 1.3 pass
1126    
1127 slacapra 1.1 return txt
1128    
1129     def modifySteeringCards(self, nj):
1130     """
1131     modify the card provided by the user,
1132     writing a new card into share dir
1133     """
1134    
1135     def executableName(self):
1136 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1137 spiga 1.42 return "sh "
1138     else:
1139     return self.executable
1140 slacapra 1.1
1141     def executableArgs(self):
1142 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1143 spiga 1.42 return self.scriptExe + " $NJob"
1144 fanzago 1.115 else:
1145     # if >= CMSSW_1_5_X, add -e
1146     version_array = self.scram.getSWVersion().split('_')
1147     major = 0
1148     minor = 0
1149     try:
1150     major = int(version_array[1])
1151     minor = int(version_array[2])
1152     except:
1153     msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1154     raise CrabException(msg)
1155     if major >= 1 and minor >= 5 :
1156     return " -e -p pset.cfg"
1157     else:
1158     return " -p pset.cfg"
1159 slacapra 1.1
1160     def inputSandbox(self, nj):
1161     """
1162     Returns a list of filenames to be put in JDL input sandbox.
1163     """
1164     inp_box = []
1165 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1166     # seen = {}
1167 slacapra 1.1 ## code
1168     if os.path.isfile(self.tgzNameWithPath):
1169     inp_box.append(self.tgzNameWithPath)
1170 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1171     inp_box.append(self.MLtgzfile)
1172 slacapra 1.1 ## config
1173 slacapra 1.70 if not self.pset is None:
1174 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1175 slacapra 1.1 ## additional input files
1176 slacapra 1.97 tgz = self.additionalInputFileTgz()
1177     inp_box.append(tgz)
1178 slacapra 1.1 return inp_box
1179    
1180     def outputSandbox(self, nj):
1181     """
1182     Returns a list of filenames to be put in JDL output sandbox.
1183     """
1184     out_box = []
1185    
1186     ## User Declared output files
1187 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1188 slacapra 1.1 n_out = nj + 1
1189     out_box.append(self.numberFile_(out,str(n_out)))
1190     return out_box
1191    
1192     def prepareSteeringCards(self):
1193     """
1194     Make initial modifications of the user's steering card file.
1195     """
1196     return
1197    
1198     def wsRenameOutput(self, nj):
1199     """
1200     Returns part of a job script which renames the produced files.
1201     """
1202    
1203     txt = '\n'
1204 gutsche 1.7 txt += '# directory content\n'
1205     txt += 'ls \n'
1206 slacapra 1.54
1207     for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1208 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1209     txt += '\n'
1210 gutsche 1.7 txt += '# check output file\n'
1211 slacapra 1.105 # txt += 'ls '+fileWithSuffix+'\n'
1212     # txt += 'ls_result=$?\n'
1213 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1214 fanzago 1.117 ###### FEDE FOR OUTPUT DATA PUBLICATION ########
1215     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1216     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1217     ################################################
1218 slacapra 1.106 txt += 'else\n'
1219 fanzago 1.117 txt += ' exit_status=60302\n'
1220     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1221     ############# FEDE ADDED CHECK FOR OUTPUT #############
1222 mcinquil 1.119 ## MATTY's FIX: the exit option was interrupting the execution
1223 fanzago 1.117 if fileWithSuffix in self.output_file:
1224     txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1225 mcinquil 1.125 txt += ' output_exit_status=$exit_status\n'
1226 mcinquil 1.119 txt += ' # exit $exit_status\n'
1227 fanzago 1.117 #######################################################
1228 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
1229     txt += ' if [ $middleware == OSG ]; then \n'
1230     txt += ' echo "prepare dummy output file"\n'
1231     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1232     txt += ' fi \n'
1233 slacapra 1.1 txt += 'fi\n'
1234 slacapra 1.105 file_list = []
1235     for fileWithSuffix in (self.output_file):
1236     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1237 fanzago 1.117
1238 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1239 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1240 slacapra 1.1 return txt
1241    
1242     def numberFile_(self, file, txt):
1243     """
1244     append _'txt' before last extension of a file
1245     """
1246     p = string.split(file,".")
1247     # take away last extension
1248     name = p[0]
1249     for x in p[1:-1]:
1250 slacapra 1.90 name=name+"."+x
1251 slacapra 1.1 # add "_txt"
1252     if len(p)>1:
1253 slacapra 1.90 ext = p[len(p)-1]
1254     result = name + '_' + txt + "." + ext
1255 slacapra 1.1 else:
1256 slacapra 1.90 result = name + '_' + txt
1257 slacapra 1.1
1258     return result
1259    
1260 slacapra 1.63 def getRequirements(self, nj=[]):
1261 slacapra 1.1 """
1262     return job requirements to add to jdl files
1263     """
1264     req = ''
1265 slacapra 1.47 if self.version:
1266 slacapra 1.10 req='Member("VO-cms-' + \
1267 slacapra 1.47 self.version + \
1268 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1269 farinafa 1.111 ## SL add requirement for OS version only if SL4
1270     #reSL4 = re.compile( r'slc4' )
1271 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1272 gutsche 1.107 req+=' && Member("VO-cms-' + \
1273 slacapra 1.105 self.executable_arch + \
1274     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1275 gutsche 1.35
1276     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1277    
1278 slacapra 1.1 return req
1279 gutsche 1.3
1280     def configFilename(self):
1281     """ return the config filename """
1282     return self.name()+'.cfg'
1283    
1284     ### OLI_DANIELE
1285     def wsSetupCMSOSGEnvironment_(self):
1286     """
1287     Returns part of a job script which is prepares
1288     the execution environment and which is common for all CMS jobs.
1289     """
1290     txt = '\n'
1291     txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n'
1292     txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1293     txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1294 spiga 1.87 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1295 gutsche 1.3 txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1296 mkirn 1.40 txt += ' elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1297     txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1298 spiga 1.87 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1299 mkirn 1.40 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1300 gutsche 1.3 txt += ' else\n'
1301 mkirn 1.40 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1302 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1303     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1304     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1305 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1306     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1307     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1308 gutsche 1.7 txt += ' exit 1\n'
1309 gutsche 1.3 txt += '\n'
1310     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1311     txt += ' cd $RUNTIME_AREA\n'
1312     txt += ' /bin/rm -rf $WORKING_DIR\n'
1313     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1314 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1315     txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1316     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1317     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1318     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1319     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1320     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1321 gutsche 1.3 txt += ' fi\n'
1322     txt += '\n'
1323 gutsche 1.7 txt += ' exit 1\n'
1324 gutsche 1.3 txt += ' fi\n'
1325     txt += '\n'
1326     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1327     txt += ' echo " END SETUP CMS OSG ENVIRONMENT "\n'
1328    
1329     return txt
1330    
1331     ### OLI_DANIELE
1332     def wsSetupCMSLCGEnvironment_(self):
1333     """
1334     Returns part of a job script which is prepares
1335     the execution environment and which is common for all CMS jobs.
1336     """
1337     txt = ' \n'
1338     txt += ' echo " ### SETUP CMS LCG ENVIRONMENT ### "\n'
1339     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1340     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1341     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1342     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1343     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1344 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1345     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1346     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1347 gutsche 1.7 txt += ' exit 1\n'
1348 gutsche 1.3 txt += ' else\n'
1349     txt += ' echo "Sourcing environment... "\n'
1350     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1351     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1352     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1353     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1354     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1355 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1356     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1357     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1358 gutsche 1.7 txt += ' exit 1\n'
1359 gutsche 1.3 txt += ' fi\n'
1360     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1361     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1362     txt += ' result=$?\n'
1363     txt += ' if [ $result -ne 0 ]; then\n'
1364     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1365     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1366     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1367     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1368 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1369     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1370     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1371 gutsche 1.7 txt += ' exit 1\n'
1372 gutsche 1.3 txt += ' fi\n'
1373     txt += ' fi\n'
1374     txt += ' \n'
1375     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1376     txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1377     return txt
1378 gutsche 1.5
1379 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1380     def modifyReport(self, nj):
1381     """
1382     insert the part of the script that modifies the FrameworkJob Report
1383     """
1384 fanzago 1.94
1385 fanzago 1.93 txt = ''
1386 fanzago 1.94 try:
1387     publish_data = int(self.cfg_params['USER.publish_data'])
1388     except KeyError:
1389     publish_data = 0
1390 fanzago 1.122 if (publish_data == 1):
1391     txt += 'echo "Modify Job Report" \n'
1392     #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1393     ################ FEDE FOR DBS2 #############################################
1394     txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1395     #############################################################################
1396     #try:
1397     # publish_data = int(self.cfg_params['USER.publish_data'])
1398     #except KeyError:
1399     # publish_data = 0
1400 fanzago 1.94
1401 fanzago 1.122 txt += 'if [ -z "$SE" ]; then\n'
1402     txt += ' SE="" \n'
1403     txt += 'fi \n'
1404     txt += 'if [ -z "$SE_PATH" ]; then\n'
1405     txt += ' SE_PATH="" \n'
1406     txt += 'fi \n'
1407     txt += 'echo "SE = $SE"\n'
1408     txt += 'echo "SE_PATH = $SE_PATH"\n'
1409 fanzago 1.94
1410 fanzago 1.122 #if (publish_data == 1):
1411 fanzago 1.94 #processedDataset = self.cfg_params['USER.processed_datasetname']
1412     processedDataset = self.cfg_params['USER.publish_data_name']
1413     txt += 'ProcessedDataset='+processedDataset+'\n'
1414     #### LFN=/store/user/<user>/processedDataset_PSETHASH
1415     txt += 'if [ "$SE_PATH" == "" ]; then\n'
1416 fanzago 1.101 #### FEDE: added slash in LFN ##############
1417     txt += ' FOR_LFN=/copy_problems/ \n'
1418 fanzago 1.94 txt += 'else \n'
1419     txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1420 fanzago 1.101 ##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1421 fanzago 1.94 txt += ' FOR_LFN=/store$tmp \n'
1422     txt += 'fi \n'
1423     txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1424     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1425 spiga 1.103 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1426     #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1427     txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1428     txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1429     #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1430    
1431     txt += 'modifyReport_result=$?\n'
1432     txt += 'echo modifyReport_result = $modifyReport_result\n'
1433     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1434     txt += ' exit_status=1\n'
1435     txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1436     txt += 'else\n'
1437     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1438     txt += 'fi\n'
1439 fanzago 1.94 else:
1440 fanzago 1.122 txt += 'echo "no data publication required"\n'
1441     #txt += 'ProcessedDataset=no_data_to_publish \n'
1442 fanzago 1.101 #### FEDE: added slash in LFN ##############
1443 fanzago 1.122 #txt += 'FOR_LFN=/local/ \n'
1444     #txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1445     #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1446 fanzago 1.93 return txt
1447 fanzago 1.99
1448     def cleanEnv(self):
1449     ### OLI_DANIELE
1450     txt = ''
1451     txt += 'if [ $middleware == OSG ]; then\n'
1452     txt += ' cd $RUNTIME_AREA\n'
1453     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1454     txt += ' /bin/rm -rf $WORKING_DIR\n'
1455     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1456     txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1457     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1458     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1459     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1460     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1461     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1462     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1463     txt += ' fi\n'
1464     txt += 'fi\n'
1465     txt += '\n'
1466     return txt
1467 fanzago 1.93
1468 gutsche 1.5 def setParam_(self, param, value):
1469     self._params[param] = value
1470    
1471     def getParams(self):
1472     return self._params
1473 gutsche 1.8
1474     def setTaskid_(self):
1475     self._taskId = self.cfg_params['taskId']
1476    
1477     def getTaskid(self):
1478     return self._taskId
1479 gutsche 1.35
1480     def uniquelist(self, old):
1481     """
1482     remove duplicates from a list
1483     """
1484     nd={}
1485     for e in old:
1486     nd[e]=0
1487     return nd.keys()
1488 mcinquil 1.121
1489    
1490     def checkOut(self, limit):
1491     """
1492     check the dimension of the output files
1493     """
1494     txt = 'echo "*****************************************"\n'
1495     txt += 'echo "** Starting output sandbox limit check **"\n'
1496     txt += 'echo "*****************************************"\n'
1497     allOutFiles = ""
1498     listOutFiles = []
1499     for fileOut in (self.output_file+self.output_file_sandbox):
1500     if fileOut.find('crab_fjr') == -1:
1501     allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1502     listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1503     txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1504     txt += 'ls -gGhrta;\n'
1505     txt += 'sum=0;\n'
1506     txt += 'for file in '+str(allOutFiles)+' ; do\n'
1507     txt += ' if [ -e $file ]; then\n'
1508     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1509     txt += ' sum=`expr $sum + $tt`\n'
1510     txt += ' else\n'
1511     txt += ' echo "WARNING: output file $file not found!"\n'
1512     txt += ' fi\n'
1513     txt += 'done\n'
1514     txt += 'echo "Total Output dimension: $sum";\n'
1515     txt += 'limit='+str(limit)+';\n'
1516     txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1517     txt += 'if [ $limit -lt $sum ]; then\n'
1518     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1519     txt += ' echo " checking the output file sizes..."\n'
1520     """
1521     txt += ' dim=0;\n'
1522     txt += ' exclude=0;\n'
1523     txt += ' for files in '+str(allOutFiles)+' ; do\n'
1524     txt += ' sumTemp=0;\n'
1525     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1526     txt += ' if [ $file != $file2 ]; then\n'
1527     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1528     txt += ' sumTemp=`expr $sumTemp + $tt`;\n'
1529     txt += ' fi\n'
1530     txt += ' done\n'
1531     txt += ' if [ $sumTemp -lt $limit ]; then\n'
1532     txt += ' if [ $dim -lt $sumTemp ]; then\n'
1533     txt += ' dim=$sumTemp;\n'
1534     txt += ' exclude=$file;\n'
1535     txt += ' fi\n'
1536     txt += ' fi\n'
1537     txt += ' done\n'
1538     txt += ' echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1539     """
1540     txt += ' tot=0;\n'
1541     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1542     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1543     txt += ' tot=`expr $tot + $tt`;\n'
1544     txt += ' if [ $limit -lt $tot ]; then\n'
1545     txt += ' tot=`expr $tot - $tt`;\n'
1546     txt += ' fileLast=$file;\n'
1547     txt += ' break;\n'
1548     txt += ' fi\n'
1549     txt += ' done\n'
1550     txt += ' echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1551     txt += ' flag=0;\n'
1552     txt += ' for filess in '+str(allOutFiles)+' ; do\n'
1553     txt += ' if [ $fileLast = $filess ]; then\n'
1554     txt += ' flag=1;\n'
1555     txt += ' fi\n'
1556     txt += ' if [ $flag -eq 1 ]; then\n'
1557     txt += ' rm -f $filess;\n'
1558     txt += ' fi\n'
1559     txt += ' done\n'
1560     txt += ' ls -agGhrt;\n'
1561     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1562     txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1563     txt += ' exit_status=70000;\n'
1564     txt += 'else'
1565     txt += ' echo "Total Output dimension $sum is fine.";\n'
1566     txt += 'fi\n'
1567     txt += 'echo "*****************************************"\n'
1568     txt += 'echo "*** Ending output sandbox limit check ***"\n'
1569     txt += 'echo "*****************************************"\n'
1570     return txt