ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.116.2.8
Committed: Wed Oct 17 13:25:00 2007 UTC (17 years, 6 months ago) by fanzago
Content type: text/x-python
Branch: CRAB_1_5_4_SLC3_start
CVS Tags: CRAB_1_5_4_SLC3, CRAB_1_5_4_SLC3_pre4, CRAB_1_5_4_SLC3_pre3
Branch point for: CRAB_1_5_4_SLC3_pre4_br
Changes since 1.116.2.7: +7 -3 lines
Log Message:
fixed printout for pythia job

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8    
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 gutsche 1.38 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 gutsche 1.3 self._params = {}
17     self.cfg_params = cfg_params
18 gutsche 1.38
19 fanzago 1.115 # init BlackWhiteListParser
20     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21    
22 gutsche 1.72 try:
23     self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24     except KeyError:
25 slacapra 1.86 self.MaxTarBallSize = 9.5
26 gutsche 1.72
27 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
28 gutsche 1.38 self.ncjobs = ncjobs
29    
30 slacapra 1.1 log = common.logger
31    
32     self.scram = Scram.Scram(cfg_params)
33     self.additional_inbox_files = []
34     self.scriptExe = ''
35     self.executable = ''
36 slacapra 1.71 self.executable_arch = self.scram.getArch()
37 slacapra 1.1 self.tgz_name = 'default.tgz'
38 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
39 corvo 1.56 self.scriptName = 'CMSSW.sh'
40 spiga 1.42 self.pset = '' #scrip use case Da
41     self.datasetPath = '' #scrip use case Da
42 gutsche 1.3
43 gutsche 1.50 # set FJR file name
44     self.fjrFileName = 'crab_fjr.xml'
45    
46 slacapra 1.1 self.version = self.scram.getSWVersion()
47 spiga 1.114
48     #
49     # Try to block creation in case of arch/version mismatch
50     #
51    
52     a = string.split(self.version, "_")
53    
54     if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
56     raise CrabException(msg)
57     if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59     raise CrabException(msg)
60    
61 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
62 gutsche 1.5 self.setParam_('application', self.version)
63 slacapra 1.47
64 slacapra 1.1 ### collect Data cards
65 gutsche 1.66
66     ## get DBS mode
67     try:
68 slacapra 1.86 self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 gutsche 1.66 except KeyError:
70 slacapra 1.86 self.use_dbs_1 = 0
71 gutsche 1.66
72 slacapra 1.1 try:
73 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
74     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
75     if string.lower(tmp)=='none':
76     self.datasetPath = None
77 slacapra 1.21 self.selectNoInput = 1
78 slacapra 1.9 else:
79     self.datasetPath = tmp
80 slacapra 1.21 self.selectNoInput = 0
81 slacapra 1.1 except KeyError:
82 gutsche 1.3 msg = "Error: datasetpath not defined "
83 slacapra 1.1 raise CrabException(msg)
84 gutsche 1.5
85     # ML monitoring
86     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
87 slacapra 1.9 if not self.datasetPath:
88     self.setParam_('dataset', 'None')
89     self.setParam_('owner', 'None')
90     else:
91 gutsche 1.92 try:
92     datasetpath_split = self.datasetPath.split("/")
93     # standard style
94 fanzago 1.116.2.1 self.setParam_('datasetFull', self.datasetPath)
95 gutsche 1.92 if self.use_dbs_1 == 1 :
96     self.setParam_('dataset', datasetpath_split[1])
97     self.setParam_('owner', datasetpath_split[-1])
98     else:
99     self.setParam_('dataset', datasetpath_split[1])
100     self.setParam_('owner', datasetpath_split[2])
101     except:
102     self.setParam_('dataset', self.datasetPath)
103     self.setParam_('owner', self.datasetPath)
104    
105 gutsche 1.8 self.setTaskid_()
106     self.setParam_('taskId', self.cfg_params['taskId'])
107 gutsche 1.5
108 slacapra 1.1 self.dataTiers = []
109    
110     ## now the application
111     try:
112     self.executable = cfg_params['CMSSW.executable']
113 gutsche 1.5 self.setParam_('exe', self.executable)
114 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
115     msg = "Default executable cmsRun overridden. Switch to " + self.executable
116     log.debug(3,msg)
117     except KeyError:
118     self.executable = 'cmsRun'
119 gutsche 1.5 self.setParam_('exe', self.executable)
120 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
121     log.debug(3,msg)
122     pass
123    
124     try:
125     self.pset = cfg_params['CMSSW.pset']
126     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 spiga 1.42 if self.pset.lower() != 'none' :
128     if (not os.path.exists(self.pset)):
129     raise CrabException("User defined PSet file "+self.pset+" does not exist")
130     else:
131     self.pset = None
132 slacapra 1.1 except KeyError:
133     raise CrabException("PSet file missing. Cannot run cmsRun ")
134    
135     # output files
136 slacapra 1.53 ## stuff which must be returned always via sandbox
137     self.output_file_sandbox = []
138    
139     # add fjr report by default via sandbox
140     self.output_file_sandbox.append(self.fjrFileName)
141    
142     # other output files to be returned via sandbox or copied to SE
143 slacapra 1.1 try:
144     self.output_file = []
145     tmp = cfg_params['CMSSW.output_file']
146     if tmp != '':
147     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
148     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
149     for tmp in tmpOutFiles:
150     tmp=string.strip(tmp)
151     self.output_file.append(tmp)
152     pass
153     else:
154 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155 slacapra 1.1 pass
156     pass
157     except KeyError:
158 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159 slacapra 1.1 pass
160    
161     # script_exe file as additional file in inputSandbox
162     try:
163 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
164     if self.scriptExe != '':
165     if not os.path.isfile(self.scriptExe):
166 slacapra 1.64 msg ="ERROR. file "+self.scriptExe+" not found"
167 slacapra 1.10 raise CrabException(msg)
168 spiga 1.42 self.additional_inbox_files.append(string.strip(self.scriptExe))
169 slacapra 1.1 except KeyError:
170 spiga 1.42 self.scriptExe = ''
171 slacapra 1.70
172 spiga 1.42 #CarlosDaniele
173     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174 slacapra 1.70 msg ="Error. script_exe not defined"
175 spiga 1.42 raise CrabException(msg)
176    
177 slacapra 1.1 ## additional input files
178     try:
179 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
180 slacapra 1.70 for tmp in tmpAddFiles:
181     tmp = string.strip(tmp)
182     dirname = ''
183     if not tmp[0]=="/": dirname = "."
184 corvo 1.85 files = []
185     if string.find(tmp,"*")>-1:
186     files = glob.glob(os.path.join(dirname, tmp))
187     if len(files)==0:
188     raise CrabException("No additional input file found with this pattern: "+tmp)
189     else:
190     files.append(tmp)
191 slacapra 1.70 for file in files:
192     if not os.path.exists(file):
193     raise CrabException("Additional input file not found: "+file)
194 slacapra 1.45 pass
195 slacapra 1.105 # fname = string.split(file, '/')[-1]
196     # storedFile = common.work_space.pathForTgz()+'share/'+fname
197     # shutil.copyfile(file, storedFile)
198     self.additional_inbox_files.append(string.strip(file))
199 slacapra 1.1 pass
200     pass
201 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
202 slacapra 1.1 except KeyError:
203     pass
204    
205 slacapra 1.9 # files per job
206 slacapra 1.1 try:
207 gutsche 1.35 if (cfg_params['CMSSW.files_per_jobs']):
208     raise CrabException("files_per_jobs no longer supported. Quitting.")
209 gutsche 1.3 except KeyError:
210 gutsche 1.35 pass
211 gutsche 1.3
212 slacapra 1.9 ## Events per job
213 gutsche 1.3 try:
214 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
215 slacapra 1.9 self.selectEventsPerJob = 1
216 gutsche 1.3 except KeyError:
217 slacapra 1.9 self.eventsPerJob = -1
218     self.selectEventsPerJob = 0
219    
220 slacapra 1.22 ## number of jobs
221     try:
222     self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
223     self.selectNumberOfJobs = 1
224     except KeyError:
225     self.theNumberOfJobs = 0
226     self.selectNumberOfJobs = 0
227 slacapra 1.10
228 gutsche 1.35 try:
229     self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
230     self.selectTotalNumberEvents = 1
231     except KeyError:
232     self.total_number_of_events = 0
233     self.selectTotalNumberEvents = 0
234    
235 spiga 1.42 if self.pset != None: #CarlosDaniele
236     if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
237     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
238     raise CrabException(msg)
239     else:
240     if (self.selectNumberOfJobs == 0):
241     msg = 'Must specify number_of_jobs.'
242     raise CrabException(msg)
243 gutsche 1.35
244 slacapra 1.22 ## source seed for pythia
245     try:
246     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
247     except KeyError:
248 slacapra 1.23 self.sourceSeed = None
249     common.logger.debug(5,"No seed given")
250 slacapra 1.22
251 slacapra 1.28 try:
252     self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
253     except KeyError:
254     self.sourceSeedVtx = None
255     common.logger.debug(5,"No vertex seed given")
256 slacapra 1.90
257     try:
258     self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259     except KeyError:
260     self.sourceSeedG4 = None
261     common.logger.debug(5,"No g4 sim hits seed given")
262    
263     try:
264     self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265     except KeyError:
266     self.sourceSeedMix = None
267     common.logger.debug(5,"No mix seed given")
268    
269 spiga 1.57 try:
270     self.firstRun = int(cfg_params['CMSSW.first_run'])
271     except KeyError:
272     self.firstRun = None
273     common.logger.debug(5,"No first run given")
274 spiga 1.42 if self.pset != None: #CarlosDaniele
275 slacapra 1.97 ver = string.split(self.version,"_")
276     if (int(ver[1])>=1 and int(ver[2])>=5):
277     import PsetManipulator150 as pp
278     else:
279     import PsetManipulator as pp
280     PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
281 gutsche 1.3
282 slacapra 1.1 #DBSDLS-start
283     ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
284     self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
285     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
286 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
287 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
288 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
289 gutsche 1.35 blockSites = {}
290 slacapra 1.9 if self.datasetPath:
291 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
292 slacapra 1.1 #DBSDLS-end
293    
294     self.tgzNameWithPath = self.getTarBall(self.executable)
295 slacapra 1.10
296 slacapra 1.9 ## Select Splitting
297 spiga 1.42 if self.selectNoInput:
298     if self.pset == None: #CarlosDaniele
299     self.jobSplittingForScript()
300     else:
301     self.jobSplittingNoInput()
302 gutsche 1.92 else:
303 corvo 1.56 self.jobSplittingByBlocks(blockSites)
304 gutsche 1.5
305 slacapra 1.22 # modify Pset
306 spiga 1.42 if self.pset != None: #CarlosDaniele
307 slacapra 1.86 try:
308     if (self.datasetPath): # standard job
309     # allow to processa a fraction of events in a file
310 slacapra 1.90 PsetEdit.inputModule("INPUT")
311     PsetEdit.maxEvent("INPUTMAXEVENTS")
312     PsetEdit.skipEvent("INPUTSKIPEVENTS")
313 slacapra 1.86 else: # pythia like job
314 slacapra 1.90 PsetEdit.maxEvent(self.eventsPerJob)
315 slacapra 1.86 if (self.firstRun):
316 slacapra 1.90 PsetEdit.pythiaFirstRun("INPUTFIRSTRUN") #First Run
317 slacapra 1.86 if (self.sourceSeed) :
318 slacapra 1.90 PsetEdit.pythiaSeed("INPUT")
319 slacapra 1.86 if (self.sourceSeedVtx) :
320 slacapra 1.90 PsetEdit.vtxSeed("INPUTVTX")
321     if (self.sourceSeedG4) :
322 fanzago 1.116.2.1 PsetEdit.g4Seed("INPUTG4")
323 slacapra 1.90 if (self.sourceSeedMix) :
324 fanzago 1.116.2.1 PsetEdit.mixSeed("INPUTMIX")
325 slacapra 1.86 # add FrameworkJobReport to parameter-set
326 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
327     PsetEdit.psetWriter(self.configFilename())
328 slacapra 1.86 except:
329     msg='Error while manipuliating ParameterSet: exiting...'
330     raise CrabException(msg)
331 gutsche 1.3
332 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
333    
334 slacapra 1.86 import DataDiscovery
335     import DataDiscovery_DBS2
336     import DataLocation
337 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
338    
339     datasetPath=self.datasetPath
340    
341 slacapra 1.1 ## Contact the DBS
342 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
343 slacapra 1.1 try:
344 gutsche 1.66
345 slacapra 1.86 if self.use_dbs_1 == 1 :
346     self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
347     else :
348 corvo 1.85 self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
349 slacapra 1.1 self.pubdata.fetchDBSInfo()
350    
351 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
352 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
353     raise CrabException(msg)
354 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
355 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
356     raise CrabException(msg)
357 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
358 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
359 slacapra 1.1 raise CrabException(msg)
360 gutsche 1.67 except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
361     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
362     raise CrabException(msg)
363     except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
364     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
365     raise CrabException(msg)
366     except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
367     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
368     raise CrabException(msg)
369 slacapra 1.1
370 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
371 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
372     self.eventsbyfile=self.pubdata.getEventsPerFile()
373 gutsche 1.3
374 slacapra 1.1 ## get max number of events
375     self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
376    
377     ## Contact the DLS and build a list of sites hosting the fileblocks
378     try:
379 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
380 gutsche 1.6 dataloc.fetchDLSInfo()
381 slacapra 1.41 except DataLocation.DataLocationError , ex:
382 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
383     raise CrabException(msg)
384    
385    
386 gutsche 1.35 sites = dataloc.getSites()
387     allSites = []
388     listSites = sites.values()
389 slacapra 1.63 for listSite in listSites:
390     for oneSite in listSite:
391 gutsche 1.35 allSites.append(oneSite)
392     allSites = self.uniquelist(allSites)
393 gutsche 1.3
394 gutsche 1.92 # screen output
395     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
396    
397 gutsche 1.35 return sites
398 gutsche 1.3
399 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
400 slacapra 1.9 """
401 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
402     and no more than one block.
403     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
404     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
405     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
406     self.maxEvents, self.filesbyblock
407     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
408     self.total_number_of_jobs - Total # of jobs
409     self.list_of_args - File(s) job will run on (a list of lists)
410     """
411    
412     # ---- Handle the possible job splitting configurations ---- #
413     if (self.selectTotalNumberEvents):
414     totalEventsRequested = self.total_number_of_events
415     if (self.selectEventsPerJob):
416     eventsPerJobRequested = self.eventsPerJob
417     if (self.selectNumberOfJobs):
418     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
419    
420     # If user requested all the events in the dataset
421     if (totalEventsRequested == -1):
422     eventsRemaining=self.maxEvents
423     # If user requested more events than are in the dataset
424     elif (totalEventsRequested > self.maxEvents):
425     eventsRemaining = self.maxEvents
426     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
427     # If user requested less events than are in the dataset
428     else:
429     eventsRemaining = totalEventsRequested
430 slacapra 1.22
431 slacapra 1.41 # If user requested more events per job than are in the dataset
432     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
433     eventsPerJobRequested = self.maxEvents
434    
435 gutsche 1.35 # For user info at end
436     totalEventCount = 0
437 gutsche 1.3
438 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
439     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
440 slacapra 1.22
441 gutsche 1.35 if (self.selectNumberOfJobs):
442     common.logger.message("May not create the exact number_of_jobs requested.")
443 slacapra 1.23
444 gutsche 1.38 if ( self.ncjobs == 'all' ) :
445     totalNumberOfJobs = 999999999
446     else :
447     totalNumberOfJobs = self.ncjobs
448    
449    
450 gutsche 1.35 blocks = blockSites.keys()
451     blockCount = 0
452     # Backup variable in case self.maxEvents counted events in a non-included block
453     numBlocksInDataset = len(blocks)
454 gutsche 1.3
455 gutsche 1.35 jobCount = 0
456     list_of_lists = []
457 gutsche 1.3
458 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
459     jobsOfBlock = {}
460    
461 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
462     # ---- we've met the requested total # of events ---- #
463 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
464 gutsche 1.35 block = blocks[blockCount]
465 gutsche 1.44 blockCount += 1
466 gutsche 1.104 if block not in jobsOfBlock.keys() :
467     jobsOfBlock[block] = []
468 gutsche 1.44
469 gutsche 1.68 if self.eventsbyblock.has_key(block) :
470     numEventsInBlock = self.eventsbyblock[block]
471     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
472 slacapra 1.9
473 gutsche 1.68 files = self.filesbyblock[block]
474     numFilesInBlock = len(files)
475     if (numFilesInBlock <= 0):
476     continue
477     fileCount = 0
478    
479     # ---- New block => New job ---- #
480     parString = "\\{"
481     # counter for number of events in files currently worked on
482     filesEventCount = 0
483     # flag if next while loop should touch new file
484     newFile = 1
485     # job event counter
486     jobSkipEventCount = 0
487 slacapra 1.9
488 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
489     # ---- total # of events or we've gone over all the files in this block ---- #
490     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
491     file = files[fileCount]
492     if newFile :
493     try:
494     numEventsInFile = self.eventsbyfile[file]
495     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
496     # increase filesEventCount
497     filesEventCount += numEventsInFile
498     # Add file to current job
499     parString += '\\\"' + file + '\\\"\,'
500     newFile = 0
501     except KeyError:
502     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
503 slacapra 1.41
504 gutsche 1.38
505 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
506     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
507     # if last file in block
508     if ( fileCount == numFilesInBlock-1 ) :
509     # end job using last file, use remaining events in block
510     # close job and touch new file
511     fullString = parString[:-2]
512     fullString += '\\}'
513     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
514     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
515     self.jobDestination.append(blockSites[block])
516     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
517 gutsche 1.92 # fill jobs of block dictionary
518 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
519 gutsche 1.68 # reset counter
520     jobCount = jobCount + 1
521     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
522     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
523     jobSkipEventCount = 0
524     # reset file
525     parString = "\\{"
526     filesEventCount = 0
527     newFile = 1
528     fileCount += 1
529     else :
530     # go to next file
531     newFile = 1
532     fileCount += 1
533     # if events in file equal to eventsPerJobRequested
534     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
535 gutsche 1.38 # close job and touch new file
536     fullString = parString[:-2]
537     fullString += '\\}'
538 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
539     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
540 gutsche 1.38 self.jobDestination.append(blockSites[block])
541     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
542 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
543 gutsche 1.38 # reset counter
544     jobCount = jobCount + 1
545 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
546     eventsRemaining = eventsRemaining - eventsPerJobRequested
547 gutsche 1.38 jobSkipEventCount = 0
548     # reset file
549     parString = "\\{"
550     filesEventCount = 0
551     newFile = 1
552     fileCount += 1
553 gutsche 1.68
554     # if more events in file remain than eventsPerJobRequested
555 gutsche 1.38 else :
556 gutsche 1.68 # close job but don't touch new file
557     fullString = parString[:-2]
558     fullString += '\\}'
559     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
560     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
561     self.jobDestination.append(blockSites[block])
562     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
563 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
564 gutsche 1.68 # increase counter
565     jobCount = jobCount + 1
566     totalEventCount = totalEventCount + eventsPerJobRequested
567     eventsRemaining = eventsRemaining - eventsPerJobRequested
568     # calculate skip events for last file
569     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
570     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
571     # remove all but the last file
572     filesEventCount = self.eventsbyfile[file]
573     parString = "\\{"
574     parString += '\\\"' + file + '\\\"\,'
575     pass # END if
576     pass # END while (iterate over files in the block)
577 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
578 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
579 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
580 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
581 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
582 slacapra 1.22
583 gutsche 1.92 # screen output
584     screenOutput = "List of jobs and available destination sites:\n\n"
585    
586 fanzago 1.116.2.3 # keep trace of block with no sites to print a warning at the end
587     noSiteBlock = []
588     bloskNoSite = []
589    
590 gutsche 1.92 blockCounter = 0
591 gutsche 1.104 for block in blocks:
592     if block in jobsOfBlock.keys() :
593     blockCounter += 1
594 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
595 fanzago 1.116.2.3 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
596     noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
597     bloskNoSite.append( blockCounter )
598    
599 fanzago 1.115 common.logger.message(screenOutput)
600 fanzago 1.116.2.5 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
601 fanzago 1.116.2.4 msg = 'WARNING: No sites are hosting any part of data for block:\n '
602     virgola = ""
603     if len(bloskNoSite) > 1:
604     virgola = ","
605     for block in bloskNoSite:
606     msg += ' ' + str(block) + virgola
607     msg += '\n Related jobs:\n '
608     virgola = ""
609     if len(noSiteBlock) > 1:
610     virgola = ","
611     for range_jobs in noSiteBlock:
612     msg += str(range_jobs) + virgola
613     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
614     common.logger.message(msg)
615 fanzago 1.116.2.3
616 slacapra 1.9 self.list_of_args = list_of_lists
617     return
618    
619 slacapra 1.21 def jobSplittingNoInput(self):
620 slacapra 1.9 """
621     Perform job splitting based on number of event per job
622     """
623     common.logger.debug(5,'Splitting per events')
624 fanzago 1.116.2.8
625     if (self.selectEventsPerJob):
626     common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
627     if (self.selectNumberOfJobs):
628     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
629     if (self.selectTotalNumberEvents):
630     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
631 slacapra 1.9
632 slacapra 1.10 if (self.total_number_of_events < 0):
633     msg='Cannot split jobs per Events with "-1" as total number of events'
634     raise CrabException(msg)
635    
636 slacapra 1.22 if (self.selectEventsPerJob):
637 spiga 1.65 if (self.selectTotalNumberEvents):
638     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
639     elif(self.selectNumberOfJobs) :
640     self.total_number_of_jobs =self.theNumberOfJobs
641     self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
642    
643 slacapra 1.22 elif (self.selectNumberOfJobs) :
644     self.total_number_of_jobs = self.theNumberOfJobs
645     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
646 spiga 1.65
647 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
648    
649     # is there any remainder?
650     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
651    
652     common.logger.debug(5,'Check '+str(check))
653    
654 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
655 slacapra 1.9 if check > 0:
656 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
657 slacapra 1.9
658 slacapra 1.10 # argument is seed number.$i
659 slacapra 1.9 self.list_of_args = []
660     for i in range(self.total_number_of_jobs):
661 gutsche 1.35 ## Since there is no input, any site is good
662 slacapra 1.86 # self.jobDestination.append(["Any"])
663 spiga 1.42 self.jobDestination.append([""]) #must be empty to write correctly the xml
664 slacapra 1.90 args=[]
665 spiga 1.57 if (self.firstRun):
666     ## pythia first run
667 slacapra 1.86 #self.list_of_args.append([(str(self.firstRun)+str(i))])
668 slacapra 1.90 args.append(str(self.firstRun)+str(i))
669 spiga 1.57 else:
670     ## no first run
671 slacapra 1.86 #self.list_of_args.append([str(i)])
672 slacapra 1.90 args.append(str(i))
673 slacapra 1.23 if (self.sourceSeed):
674 slacapra 1.90 args.append(str(self.sourceSeed)+str(i))
675 slacapra 1.28 if (self.sourceSeedVtx):
676 slacapra 1.90 ## + vtx random seed
677     args.append(str(self.sourceSeedVtx)+str(i))
678     if (self.sourceSeedG4):
679     ## + G4 random seed
680     args.append(str(self.sourceSeedG4)+str(i))
681     if (self.sourceSeedMix):
682     ## + Mix random seed
683     args.append(str(self.sourceSeedMix)+str(i))
684     pass
685     pass
686     self.list_of_args.append(args)
687     pass
688 slacapra 1.86
689 slacapra 1.90 # print self.list_of_args
690 gutsche 1.3
691     return
692    
693 spiga 1.42
694     def jobSplittingForScript(self):#CarlosDaniele
695     """
696     Perform job splitting based on number of job
697     """
698     common.logger.debug(5,'Splitting per job')
699     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
700    
701     self.total_number_of_jobs = self.theNumberOfJobs
702    
703     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
704    
705     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
706    
707     # argument is seed number.$i
708     self.list_of_args = []
709     for i in range(self.total_number_of_jobs):
710     ## Since there is no input, any site is good
711     # self.jobDestination.append(["Any"])
712     self.jobDestination.append([""])
713     ## no random seed
714     self.list_of_args.append([str(i)])
715     return
716    
717 gutsche 1.3 def split(self, jobParams):
718    
719     common.jobDB.load()
720     #### Fabio
721     njobs = self.total_number_of_jobs
722 slacapra 1.9 arglist = self.list_of_args
723 gutsche 1.3 # create the empty structure
724     for i in range(njobs):
725     jobParams.append("")
726    
727     for job in range(njobs):
728 slacapra 1.17 jobParams[job] = arglist[job]
729     # print str(arglist[job])
730     # print jobParams[job]
731 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
732 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
733     common.jobDB.setDestination(job, self.jobDestination[job])
734 gutsche 1.3
735     common.jobDB.save()
736     return
737    
738     def getJobTypeArguments(self, nj, sched):
739 slacapra 1.17 result = ''
740     for i in common.jobDB.arguments(nj):
741     result=result+str(i)+" "
742     return result
743 gutsche 1.3
744     def numberOfJobs(self):
745     # Fabio
746     return self.total_number_of_jobs
747    
748 slacapra 1.1 def getTarBall(self, exe):
749     """
750     Return the TarBall with lib and exe
751     """
752    
753     # if it exist, just return it
754 corvo 1.56 #
755     # Marco. Let's start to use relative path for Boss XML files
756     #
757     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
758 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
759     return self.tgzNameWithPath
760    
761     # Prepare a tar gzipped file with user binaries.
762     self.buildTar_(exe)
763    
764     return string.strip(self.tgzNameWithPath)
765    
766     def buildTar_(self, executable):
767    
768     # First of all declare the user Scram area
769     swArea = self.scram.getSWArea_()
770     #print "swArea = ", swArea
771 slacapra 1.63 # swVersion = self.scram.getSWVersion()
772     # print "swVersion = ", swVersion
773 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
774     #print "swReleaseTop = ", swReleaseTop
775    
776     ## check if working area is release top
777     if swReleaseTop == '' or swArea == swReleaseTop:
778     return
779    
780 slacapra 1.61 import tarfile
781     try: # create tar ball
782     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
783     ## First find the executable
784 slacapra 1.86 if (self.executable != ''):
785 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
786     if ( not exeWithPath ):
787     raise CrabException('User executable '+executable+' not found')
788    
789     ## then check if it's private or not
790     if exeWithPath.find(swReleaseTop) == -1:
791     # the exe is private, so we must ship
792     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
793     path = swArea+'/'
794 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
795     if exeWithPath.find(path) >= 0 :
796     exe = string.replace(exeWithPath, path,'')
797 fanzago 1.116.2.7 tar.add(path+exe,exe)
798 corvo 1.85 else :
799     tar.add(exeWithPath,os.path.basename(executable))
800 slacapra 1.61 pass
801     else:
802     # the exe is from release, we'll find it on WN
803     pass
804    
805     ## Now get the libraries: only those in local working area
806     libDir = 'lib'
807     lib = swArea+'/' +libDir
808     common.logger.debug(5,"lib "+lib+" to be tarred")
809     if os.path.exists(lib):
810     tar.add(lib,libDir)
811    
812     ## Now check if module dir is present
813     moduleDir = 'module'
814     module = swArea + '/' + moduleDir
815     if os.path.isdir(module):
816     tar.add(module,moduleDir)
817    
818     ## Now check if any data dir(s) is present
819     swAreaLen=len(swArea)
820     for root, dirs, files in os.walk(swArea):
821     if "data" in dirs:
822     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
823     tar.add(root+"/data",root[swAreaLen:]+"/data")
824    
825     ## Add ProdAgent dir to tar
826     paDir = 'ProdAgentApi'
827     pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
828     if os.path.isdir(pa):
829     tar.add(pa,paDir)
830 fanzago 1.93
831     ### FEDE FOR DBS PUBLICATION
832     ## Add PRODCOMMON dir to tar
833     prodcommonDir = 'ProdCommon'
834     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
835     if os.path.isdir(prodcommonPath):
836     tar.add(prodcommonPath,prodcommonDir)
837     #############################
838 slacapra 1.61
839     common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
840     tar.close()
841     except :
842     raise CrabException('Could not create tar-ball')
843 gutsche 1.72
844     ## check for tarball size
845     tarballinfo = os.stat(self.tgzNameWithPath)
846     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
847     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
848    
849 slacapra 1.61 ## create tar-ball with ML stuff
850 corvo 1.58 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
851 slacapra 1.61 try:
852     tar = tarfile.open(self.MLtgzfile, "w:gz")
853     path=os.environ['CRABDIR'] + '/python/'
854     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
855     tar.add(path+file,file)
856     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
857     tar.close()
858     except :
859 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
860    
861 slacapra 1.1 return
862    
863 slacapra 1.97 def additionalInputFileTgz(self):
864     """
865     Put all additional files into a tar ball and return its name
866     """
867     import tarfile
868     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
869     tar = tarfile.open(tarName, "w:gz")
870     for file in self.additional_inbox_files:
871     tar.add(file,string.split(file,'/')[-1])
872     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
873     tar.close()
874     return tarName
875    
876 slacapra 1.1 def wsSetupEnvironment(self, nj):
877     """
878     Returns part of a job script which prepares
879     the execution environment for the job 'nj'.
880     """
881     # Prepare JobType-independent part
882 gutsche 1.3 txt = ''
883    
884     ## OLI_Daniele at this level middleware already known
885    
886     txt += 'if [ $middleware == LCG ]; then \n'
887 fanzago 1.115 txt += ' echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
888     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
889     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
890 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
891     txt += 'elif [ $middleware == OSG ]; then\n'
892 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
893     txt += ' echo "Created working directory: $WORKING_DIR"\n'
894 gutsche 1.3 txt += ' if [ ! -d $WORKING_DIR ] ;then\n'
895 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
896 slacapra 1.90 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
897     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
898     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
899 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
900     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
901     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
902 gutsche 1.3 txt += ' exit 1\n'
903     txt += ' fi\n'
904     txt += '\n'
905     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
906     txt += ' cd $WORKING_DIR\n'
907     txt += self.wsSetupCMSOSGEnvironment_()
908 fanzago 1.115 txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
909     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
910 gutsche 1.3 txt += 'fi\n'
911 slacapra 1.1
912     # Prepare JobType-specific part
913     scram = self.scram.commandName()
914     txt += '\n\n'
915     txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
916     txt += scram+' project CMSSW '+self.version+'\n'
917     txt += 'status=$?\n'
918     txt += 'if [ $status != 0 ] ; then\n'
919 gutsche 1.7 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
920 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
921 gutsche 1.7 txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
922 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
923 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
924     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
925     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
926 gutsche 1.3 ## OLI_Daniele
927     txt += ' if [ $middleware == OSG ]; then \n'
928     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
929     txt += ' cd $RUNTIME_AREA\n'
930     txt += ' /bin/rm -rf $WORKING_DIR\n'
931     txt += ' if [ -d $WORKING_DIR ] ;then\n'
932 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
933     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
934     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
935     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
936 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
937     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
938     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
939 gutsche 1.3 txt += ' fi\n'
940     txt += ' fi \n'
941     txt += ' exit 1 \n'
942 slacapra 1.1 txt += 'fi \n'
943     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
944     txt += 'cd '+self.version+'\n'
945 fanzago 1.99 ########## FEDE FOR DBS2 ######################
946     txt += 'SOFTWARE_DIR=`pwd`\n'
947     txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
948     ###############################################
949 slacapra 1.1 ### needed grep for bug in scramv1 ###
950 corvo 1.58 txt += scram+' runtime -sh\n'
951 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
952 corvo 1.58 txt += 'echo $PATH\n'
953 slacapra 1.1
954     # Handle the arguments:
955     txt += "\n"
956 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
957 slacapra 1.1 txt += "\n"
958 mkirn 1.32 # txt += "narg=$#\n"
959     txt += "if [ $nargs -lt 2 ]\n"
960 slacapra 1.1 txt += "then\n"
961 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
962 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
963 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
964 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
965 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
966     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
967     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
968 gutsche 1.3 ## OLI_Daniele
969     txt += ' if [ $middleware == OSG ]; then \n'
970     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
971     txt += ' cd $RUNTIME_AREA\n'
972     txt += ' /bin/rm -rf $WORKING_DIR\n'
973     txt += ' if [ -d $WORKING_DIR ] ;then\n'
974 fanzago 1.96 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
975     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
976     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
977     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
978 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
979     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
980     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
981 gutsche 1.3 txt += ' fi\n'
982     txt += ' fi \n'
983 slacapra 1.1 txt += " exit 1\n"
984     txt += "fi\n"
985     txt += "\n"
986    
987     # Prepare job-specific part
988     job = common.job_list[nj]
989 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
990     if (self.datasetPath):
991     txt += '\n'
992     txt += 'DatasetPath='+self.datasetPath+'\n'
993    
994     datasetpath_split = self.datasetPath.split("/")
995    
996     txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
997     txt += 'DataTier='+datasetpath_split[2]+'\n'
998 fanzago 1.96 #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
999     txt += 'ApplicationFamily=cmsRun\n'
1000 fanzago 1.93
1001     else:
1002     txt += 'DatasetPath=MCDataTier\n'
1003     txt += 'PrimaryDataset=null\n'
1004     txt += 'DataTier=null\n'
1005 fanzago 1.96 #txt += 'ProcessedDataset=null\n'
1006 fanzago 1.93 txt += 'ApplicationFamily=MCDataTier\n'
1007 spiga 1.42 if self.pset != None: #CarlosDaniele
1008     pset = os.path.basename(job.configFilename())
1009     txt += '\n'
1010 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
1011 spiga 1.42 if (self.datasetPath): # standard job
1012     #txt += 'InputFiles=$2\n'
1013     txt += 'InputFiles=${args[1]}\n'
1014     txt += 'MaxEvents=${args[2]}\n'
1015     txt += 'SkipEvents=${args[3]}\n'
1016     txt += 'echo "Inputfiles:<$InputFiles>"\n'
1017 slacapra 1.90 txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1018 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1019 slacapra 1.90 txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1020 spiga 1.42 txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1021 slacapra 1.90 txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1022 spiga 1.42 else: # pythia like job
1023 slacapra 1.90 seedIndex=1
1024     if (self.firstRun):
1025     txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
1026 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
1027 slacapra 1.90 txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1028     seedIndex=seedIndex+1
1029    
1030 spiga 1.57 if (self.sourceSeed):
1031 slacapra 1.90 txt += 'Seed=${args['+str(seedIndex)+']}\n'
1032     txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1033     seedIndex=seedIndex+1
1034     ## the following seeds are not always present
1035 spiga 1.42 if (self.sourceSeedVtx):
1036 slacapra 1.90 txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1037 spiga 1.42 txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1038 slacapra 1.90 txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1039     seedIndex += 1
1040     if (self.sourceSeedG4):
1041     txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1042     txt += 'echo "G4Seed: <$G4Seed>"\n'
1043     txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1044     seedIndex += 1
1045     if (self.sourceSeedMix):
1046     txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1047     txt += 'echo "MixSeed: <$mixSeed>"\n'
1048     txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1049     seedIndex += 1
1050     pass
1051     pass
1052     txt += 'mv -f '+pset+' pset.cfg\n'
1053 slacapra 1.1
1054     if len(self.additional_inbox_files) > 0:
1055 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1056     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1057     txt += 'fi\n'
1058 slacapra 1.1 pass
1059    
1060 spiga 1.42 if self.pset != None: #CarlosDaniele
1061     txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1062    
1063     txt += '\n'
1064     txt += 'echo "***** cat pset.cfg *********"\n'
1065     txt += 'cat pset.cfg\n'
1066     txt += 'echo "****** end pset.cfg ********"\n'
1067     txt += '\n'
1068 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1069 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1070     txt += 'echo "PSETHASH = $PSETHASH" \n'
1071 fanzago 1.93 ##############
1072     txt += '\n'
1073     # txt += 'echo "***** cat pset1.cfg *********"\n'
1074     # txt += 'cat pset1.cfg\n'
1075     # txt += 'echo "****** end pset1.cfg ********"\n'
1076 gutsche 1.3 return txt
1077    
1078 slacapra 1.63 def wsBuildExe(self, nj=0):
1079 gutsche 1.3 """
1080     Put in the script the commands to build an executable
1081     or a library.
1082     """
1083    
1084     txt = ""
1085    
1086     if os.path.isfile(self.tgzNameWithPath):
1087     txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
1088     txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1089     txt += 'untar_status=$? \n'
1090     txt += 'if [ $untar_status -ne 0 ]; then \n'
1091     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1092     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
1093 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1094 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
1095     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1096     txt += ' cd $RUNTIME_AREA\n'
1097     txt += ' /bin/rm -rf $WORKING_DIR\n'
1098     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1099 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1100     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
1101     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1102     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1103     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1104     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1105     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1106 gutsche 1.3 txt += ' fi\n'
1107     txt += ' fi \n'
1108     txt += ' \n'
1109 gutsche 1.7 txt += ' exit 1 \n'
1110 gutsche 1.3 txt += 'else \n'
1111     txt += ' echo "Successful untar" \n'
1112     txt += 'fi \n'
1113 gutsche 1.50 txt += '\n'
1114 fanzago 1.93 txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1115 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1116 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
1117 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1118     #txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1119 fanzago 1.93 #txt += ' export PYTHONPATH=ProdAgentApi\n'
1120 gutsche 1.50 txt += 'else\n'
1121 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1122     #txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1123 fanzago 1.93 #txt += ' export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1124     txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1125     ###################
1126 gutsche 1.50 txt += 'fi\n'
1127     txt += '\n'
1128    
1129 gutsche 1.3 pass
1130    
1131 slacapra 1.1 return txt
1132    
1133     def modifySteeringCards(self, nj):
1134     """
1135     modify the card provided by the user,
1136     writing a new card into share dir
1137     """
1138    
1139     def executableName(self):
1140 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1141 spiga 1.42 return "sh "
1142     else:
1143     return self.executable
1144 slacapra 1.1
1145     def executableArgs(self):
1146 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1147 spiga 1.42 return self.scriptExe + " $NJob"
1148 fanzago 1.115 else:
1149     # if >= CMSSW_1_5_X, add -e
1150     version_array = self.scram.getSWVersion().split('_')
1151     major = 0
1152     minor = 0
1153     try:
1154     major = int(version_array[1])
1155     minor = int(version_array[2])
1156     except:
1157     msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1158     raise CrabException(msg)
1159     if major >= 1 and minor >= 5 :
1160     return " -e -p pset.cfg"
1161     else:
1162     return " -p pset.cfg"
1163 slacapra 1.1
1164     def inputSandbox(self, nj):
1165     """
1166     Returns a list of filenames to be put in JDL input sandbox.
1167     """
1168     inp_box = []
1169 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1170     # seen = {}
1171 slacapra 1.1 ## code
1172     if os.path.isfile(self.tgzNameWithPath):
1173     inp_box.append(self.tgzNameWithPath)
1174 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1175     inp_box.append(self.MLtgzfile)
1176 slacapra 1.1 ## config
1177 slacapra 1.70 if not self.pset is None:
1178 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1179 slacapra 1.1 ## additional input files
1180 slacapra 1.97 tgz = self.additionalInputFileTgz()
1181     inp_box.append(tgz)
1182 slacapra 1.1 return inp_box
1183    
1184     def outputSandbox(self, nj):
1185     """
1186     Returns a list of filenames to be put in JDL output sandbox.
1187     """
1188     out_box = []
1189    
1190     ## User Declared output files
1191 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1192 slacapra 1.1 n_out = nj + 1
1193     out_box.append(self.numberFile_(out,str(n_out)))
1194     return out_box
1195    
1196     def prepareSteeringCards(self):
1197     """
1198     Make initial modifications of the user's steering card file.
1199     """
1200     return
1201    
1202     def wsRenameOutput(self, nj):
1203     """
1204     Returns part of a job script which renames the produced files.
1205     """
1206    
1207     txt = '\n'
1208 gutsche 1.7 txt += '# directory content\n'
1209     txt += 'ls \n'
1210 slacapra 1.54
1211 fanzago 1.116.2.6 txt += 'output_exit_status=0\n'
1212    
1213     for fileWithSuffix in (self.output_file_sandbox):
1214     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1215     txt += '\n'
1216     txt += '# check output file\n'
1217     txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1218     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1219     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1220     txt += 'else\n'
1221     txt += ' exit_status=60302\n'
1222     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1223     if common.scheduler.boss_scheduler_name == 'condor_g':
1224     txt += ' if [ $middleware == OSG ]; then \n'
1225     txt += ' echo "prepare dummy output file"\n'
1226     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1227     txt += ' fi \n'
1228     txt += 'fi\n'
1229    
1230     for fileWithSuffix in (self.output_file):
1231 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1232     txt += '\n'
1233 gutsche 1.7 txt += '# check output file\n'
1234 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1235 fanzago 1.116.2.1 txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1236     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1237 slacapra 1.106 txt += 'else\n'
1238 fanzago 1.116.2.1 txt += ' exit_status=60302\n'
1239     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1240 fanzago 1.116.2.6 txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1241     txt += ' output_exit_status=$exit_status\n'
1242 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
1243     txt += ' if [ $middleware == OSG ]; then \n'
1244     txt += ' echo "prepare dummy output file"\n'
1245     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1246     txt += ' fi \n'
1247 slacapra 1.1 txt += 'fi\n'
1248 slacapra 1.105 file_list = []
1249     for fileWithSuffix in (self.output_file):
1250     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1251 fanzago 1.116.2.1
1252 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1253 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1254 slacapra 1.1 return txt
1255    
1256     def numberFile_(self, file, txt):
1257     """
1258     append _'txt' before last extension of a file
1259     """
1260     p = string.split(file,".")
1261     # take away last extension
1262     name = p[0]
1263     for x in p[1:-1]:
1264 slacapra 1.90 name=name+"."+x
1265 slacapra 1.1 # add "_txt"
1266     if len(p)>1:
1267 slacapra 1.90 ext = p[len(p)-1]
1268     result = name + '_' + txt + "." + ext
1269 slacapra 1.1 else:
1270 slacapra 1.90 result = name + '_' + txt
1271 slacapra 1.1
1272     return result
1273    
1274 slacapra 1.63 def getRequirements(self, nj=[]):
1275 slacapra 1.1 """
1276     return job requirements to add to jdl files
1277     """
1278     req = ''
1279 slacapra 1.47 if self.version:
1280 slacapra 1.10 req='Member("VO-cms-' + \
1281 slacapra 1.47 self.version + \
1282 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1283 farinafa 1.111 ## SL add requirement for OS version only if SL4
1284     #reSL4 = re.compile( r'slc4' )
1285 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1286 gutsche 1.107 req+=' && Member("VO-cms-' + \
1287 slacapra 1.105 self.executable_arch + \
1288     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1289 gutsche 1.35
1290     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1291    
1292 slacapra 1.1 return req
1293 gutsche 1.3
1294     def configFilename(self):
1295     """ return the config filename """
1296     return self.name()+'.cfg'
1297    
1298     ### OLI_DANIELE
1299     def wsSetupCMSOSGEnvironment_(self):
1300     """
1301     Returns part of a job script which is prepares
1302     the execution environment and which is common for all CMS jobs.
1303     """
1304     txt = '\n'
1305     txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n'
1306     txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1307     txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1308 spiga 1.87 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1309 gutsche 1.3 txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1310 mkirn 1.40 txt += ' elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1311     txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1312 spiga 1.87 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1313 mkirn 1.40 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1314 gutsche 1.3 txt += ' else\n'
1315 mkirn 1.40 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1316 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1317     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1318     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1319 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1320     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1321     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1322 gutsche 1.7 txt += ' exit 1\n'
1323 gutsche 1.3 txt += '\n'
1324     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1325     txt += ' cd $RUNTIME_AREA\n'
1326     txt += ' /bin/rm -rf $WORKING_DIR\n'
1327     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1328 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1329     txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1330     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1331     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1332     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1333     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1334     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1335 gutsche 1.3 txt += ' fi\n'
1336     txt += '\n'
1337 gutsche 1.7 txt += ' exit 1\n'
1338 gutsche 1.3 txt += ' fi\n'
1339     txt += '\n'
1340     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1341     txt += ' echo " END SETUP CMS OSG ENVIRONMENT "\n'
1342    
1343     return txt
1344    
1345     ### OLI_DANIELE
1346     def wsSetupCMSLCGEnvironment_(self):
1347     """
1348     Returns part of a job script which is prepares
1349     the execution environment and which is common for all CMS jobs.
1350     """
1351     txt = ' \n'
1352     txt += ' echo " ### SETUP CMS LCG ENVIRONMENT ### "\n'
1353     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1354     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1355     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1356     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1357     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1358 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1359     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1360     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1361 gutsche 1.7 txt += ' exit 1\n'
1362 gutsche 1.3 txt += ' else\n'
1363     txt += ' echo "Sourcing environment... "\n'
1364     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1365     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1366     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1367     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1368     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1369 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1370     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1371     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1372 gutsche 1.7 txt += ' exit 1\n'
1373 gutsche 1.3 txt += ' fi\n'
1374     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1375     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1376     txt += ' result=$?\n'
1377     txt += ' if [ $result -ne 0 ]; then\n'
1378     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1379     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1380     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1381     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1382 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1383     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1384     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1385 gutsche 1.7 txt += ' exit 1\n'
1386 gutsche 1.3 txt += ' fi\n'
1387     txt += ' fi\n'
1388     txt += ' \n'
1389     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1390     txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1391     return txt
1392 gutsche 1.5
1393 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1394     def modifyReport(self, nj):
1395     """
1396     insert the part of the script that modifies the FrameworkJob Report
1397     """
1398 fanzago 1.94
1399 fanzago 1.93 txt = ''
1400 fanzago 1.94 try:
1401     publish_data = int(self.cfg_params['USER.publish_data'])
1402     except KeyError:
1403     publish_data = 0
1404 fanzago 1.116.2.1 if (publish_data == 1):
1405     txt += 'echo "Modify Job Report" \n'
1406     #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1407     ################ FEDE FOR DBS2 #############################################
1408     txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1409     #############################################################################
1410     #try:
1411     # publish_data = int(self.cfg_params['USER.publish_data'])
1412     #except KeyError:
1413     # publish_data = 0
1414 fanzago 1.94
1415 fanzago 1.116.2.1 txt += 'if [ -z "$SE" ]; then\n'
1416     txt += ' SE="" \n'
1417     txt += 'fi \n'
1418     txt += 'if [ -z "$SE_PATH" ]; then\n'
1419     txt += ' SE_PATH="" \n'
1420     txt += 'fi \n'
1421     txt += 'echo "SE = $SE"\n'
1422     txt += 'echo "SE_PATH = $SE_PATH"\n'
1423 fanzago 1.94
1424 fanzago 1.116.2.1 #if (publish_data == 1):
1425 fanzago 1.94 #processedDataset = self.cfg_params['USER.processed_datasetname']
1426     processedDataset = self.cfg_params['USER.publish_data_name']
1427     txt += 'ProcessedDataset='+processedDataset+'\n'
1428     #### LFN=/store/user/<user>/processedDataset_PSETHASH
1429     txt += 'if [ "$SE_PATH" == "" ]; then\n'
1430 fanzago 1.101 #### FEDE: added slash in LFN ##############
1431     txt += ' FOR_LFN=/copy_problems/ \n'
1432 fanzago 1.94 txt += 'else \n'
1433     txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1434 fanzago 1.101 ##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1435 fanzago 1.94 txt += ' FOR_LFN=/store$tmp \n'
1436     txt += 'fi \n'
1437     txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1438     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1439 spiga 1.103 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1440     #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1441     txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1442     txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1443     #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1444    
1445     txt += 'modifyReport_result=$?\n'
1446     txt += 'echo modifyReport_result = $modifyReport_result\n'
1447     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1448     txt += ' exit_status=1\n'
1449     txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1450     txt += 'else\n'
1451     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1452     txt += 'fi\n'
1453 fanzago 1.94 else:
1454 fanzago 1.116.2.1 txt += 'echo "no data publication required"\n'
1455     #txt += 'ProcessedDataset=no_data_to_publish \n'
1456 fanzago 1.101 #### FEDE: added slash in LFN ##############
1457 fanzago 1.116.2.1 #txt += 'FOR_LFN=/local/ \n'
1458     #txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1459     #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1460 fanzago 1.93 return txt
1461 fanzago 1.99
1462     def cleanEnv(self):
1463     ### OLI_DANIELE
1464     txt = ''
1465     txt += 'if [ $middleware == OSG ]; then\n'
1466     txt += ' cd $RUNTIME_AREA\n'
1467     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1468     txt += ' /bin/rm -rf $WORKING_DIR\n'
1469     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1470     txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1471     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1472     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1473     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1474     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1475     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1476     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1477     txt += ' fi\n'
1478     txt += 'fi\n'
1479     txt += '\n'
1480     return txt
1481 fanzago 1.93
1482 gutsche 1.5 def setParam_(self, param, value):
1483     self._params[param] = value
1484    
1485     def getParams(self):
1486     return self._params
1487 gutsche 1.8
1488     def setTaskid_(self):
1489     self._taskId = self.cfg_params['taskId']
1490    
1491     def getTaskid(self):
1492     return self._taskId
1493 gutsche 1.35
1494     def uniquelist(self, old):
1495     """
1496     remove duplicates from a list
1497     """
1498     nd={}
1499     for e in old:
1500     nd[e]=0
1501     return nd.keys()
1502 fanzago 1.116.2.1
1503    
1504     def checkOut(self, limit):
1505     """
1506     check the dimension of the output files
1507     """
1508     txt = 'echo "*****************************************"\n'
1509     txt += 'echo "** Starting output sandbox limit check **"\n'
1510     txt += 'echo "*****************************************"\n'
1511     allOutFiles = ""
1512     listOutFiles = []
1513     for fileOut in (self.output_file+self.output_file_sandbox):
1514     if fileOut.find('crab_fjr') == -1:
1515     allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1516     listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1517     txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1518     txt += 'ls -gGhrta;\n'
1519     txt += 'sum=0;\n'
1520     txt += 'for file in '+str(allOutFiles)+' ; do\n'
1521     txt += ' if [ -e $file ]; then\n'
1522     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1523     txt += ' sum=`expr $sum + $tt`\n'
1524     txt += ' else\n'
1525     txt += ' echo "WARNING: output file $file not found!"\n'
1526     txt += ' fi\n'
1527     txt += 'done\n'
1528     txt += 'echo "Total Output dimension: $sum";\n'
1529     txt += 'limit='+str(limit)+';\n'
1530     txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1531     txt += 'if [ $limit -lt $sum ]; then\n'
1532     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1533     txt += ' echo " checking the output file sizes..."\n'
1534     """
1535     txt += ' dim=0;\n'
1536     txt += ' exclude=0;\n'
1537     txt += ' for files in '+str(allOutFiles)+' ; do\n'
1538     txt += ' sumTemp=0;\n'
1539     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1540     txt += ' if [ $file != $file2 ]; then\n'
1541     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1542     txt += ' sumTemp=`expr $sumTemp + $tt`;\n'
1543     txt += ' fi\n'
1544     txt += ' done\n'
1545     txt += ' if [ $sumTemp -lt $limit ]; then\n'
1546     txt += ' if [ $dim -lt $sumTemp ]; then\n'
1547     txt += ' dim=$sumTemp;\n'
1548     txt += ' exclude=$file;\n'
1549     txt += ' fi\n'
1550     txt += ' fi\n'
1551     txt += ' done\n'
1552     txt += ' echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1553     """
1554     txt += ' tot=0;\n'
1555     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1556     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1557     txt += ' tot=`expr $tot + $tt`;\n'
1558     txt += ' if [ $limit -lt $tot ]; then\n'
1559     txt += ' tot=`expr $tot - $tt`;\n'
1560     txt += ' fileLast=$file;\n'
1561     txt += ' break;\n'
1562     txt += ' fi\n'
1563     txt += ' done\n'
1564     txt += ' echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1565     txt += ' flag=0;\n'
1566     txt += ' for filess in '+str(allOutFiles)+' ; do\n'
1567     txt += ' if [ $fileLast = $filess ]; then\n'
1568     txt += ' flag=1;\n'
1569     txt += ' fi\n'
1570     txt += ' if [ $flag -eq 1 ]; then\n'
1571     txt += ' rm -f $filess;\n'
1572     txt += ' fi\n'
1573     txt += ' done\n'
1574     txt += ' ls -agGhrt;\n'
1575     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1576     txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1577     txt += ' exit_status=70000;\n'
1578     txt += 'else'
1579     txt += ' echo "Total Output dimension $sum is fine.";\n'
1580     txt += 'fi\n'
1581     txt += 'echo "*****************************************"\n'
1582     txt += 'echo "*** Ending output sandbox limit check ***"\n'
1583     txt += 'echo "*****************************************"\n'
1584     return txt