ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.116.2.6
Committed: Fri Oct 12 09:46:06 2007 UTC (17 years, 6 months ago) by fanzago
Content type: text/x-python
Branch: CRAB_1_5_4_SLC3_start
CVS Tags: CRAB_1_5_4_SLC3_pre2
Changes since 1.116.2.5: +22 -12 lines
Log Message:
fixed the output creation control

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8    
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 gutsche 1.38 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 gutsche 1.3 self._params = {}
17     self.cfg_params = cfg_params
18 gutsche 1.38
19 fanzago 1.115 # init BlackWhiteListParser
20     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21    
22 gutsche 1.72 try:
23     self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24     except KeyError:
25 slacapra 1.86 self.MaxTarBallSize = 9.5
26 gutsche 1.72
27 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
28 gutsche 1.38 self.ncjobs = ncjobs
29    
30 slacapra 1.1 log = common.logger
31    
32     self.scram = Scram.Scram(cfg_params)
33     self.additional_inbox_files = []
34     self.scriptExe = ''
35     self.executable = ''
36 slacapra 1.71 self.executable_arch = self.scram.getArch()
37 slacapra 1.1 self.tgz_name = 'default.tgz'
38 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
39 corvo 1.56 self.scriptName = 'CMSSW.sh'
40 spiga 1.42 self.pset = '' #scrip use case Da
41     self.datasetPath = '' #scrip use case Da
42 gutsche 1.3
43 gutsche 1.50 # set FJR file name
44     self.fjrFileName = 'crab_fjr.xml'
45    
46 slacapra 1.1 self.version = self.scram.getSWVersion()
47 spiga 1.114
48     #
49     # Try to block creation in case of arch/version mismatch
50     #
51    
52     a = string.split(self.version, "_")
53    
54     if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
56     raise CrabException(msg)
57     if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59     raise CrabException(msg)
60    
61 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
62 gutsche 1.5 self.setParam_('application', self.version)
63 slacapra 1.47
64 slacapra 1.1 ### collect Data cards
65 gutsche 1.66
66     ## get DBS mode
67     try:
68 slacapra 1.86 self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 gutsche 1.66 except KeyError:
70 slacapra 1.86 self.use_dbs_1 = 0
71 gutsche 1.66
72 slacapra 1.1 try:
73 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
74     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
75     if string.lower(tmp)=='none':
76     self.datasetPath = None
77 slacapra 1.21 self.selectNoInput = 1
78 slacapra 1.9 else:
79     self.datasetPath = tmp
80 slacapra 1.21 self.selectNoInput = 0
81 slacapra 1.1 except KeyError:
82 gutsche 1.3 msg = "Error: datasetpath not defined "
83 slacapra 1.1 raise CrabException(msg)
84 gutsche 1.5
85     # ML monitoring
86     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
87 slacapra 1.9 if not self.datasetPath:
88     self.setParam_('dataset', 'None')
89     self.setParam_('owner', 'None')
90     else:
91 gutsche 1.92 try:
92     datasetpath_split = self.datasetPath.split("/")
93     # standard style
94 fanzago 1.116.2.1 self.setParam_('datasetFull', self.datasetPath)
95 gutsche 1.92 if self.use_dbs_1 == 1 :
96     self.setParam_('dataset', datasetpath_split[1])
97     self.setParam_('owner', datasetpath_split[-1])
98     else:
99     self.setParam_('dataset', datasetpath_split[1])
100     self.setParam_('owner', datasetpath_split[2])
101     except:
102     self.setParam_('dataset', self.datasetPath)
103     self.setParam_('owner', self.datasetPath)
104    
105 gutsche 1.8 self.setTaskid_()
106     self.setParam_('taskId', self.cfg_params['taskId'])
107 gutsche 1.5
108 slacapra 1.1 self.dataTiers = []
109    
110     ## now the application
111     try:
112     self.executable = cfg_params['CMSSW.executable']
113 gutsche 1.5 self.setParam_('exe', self.executable)
114 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
115     msg = "Default executable cmsRun overridden. Switch to " + self.executable
116     log.debug(3,msg)
117     except KeyError:
118     self.executable = 'cmsRun'
119 gutsche 1.5 self.setParam_('exe', self.executable)
120 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
121     log.debug(3,msg)
122     pass
123    
124     try:
125     self.pset = cfg_params['CMSSW.pset']
126     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 spiga 1.42 if self.pset.lower() != 'none' :
128     if (not os.path.exists(self.pset)):
129     raise CrabException("User defined PSet file "+self.pset+" does not exist")
130     else:
131     self.pset = None
132 slacapra 1.1 except KeyError:
133     raise CrabException("PSet file missing. Cannot run cmsRun ")
134    
135     # output files
136 slacapra 1.53 ## stuff which must be returned always via sandbox
137     self.output_file_sandbox = []
138    
139     # add fjr report by default via sandbox
140     self.output_file_sandbox.append(self.fjrFileName)
141    
142     # other output files to be returned via sandbox or copied to SE
143 slacapra 1.1 try:
144     self.output_file = []
145     tmp = cfg_params['CMSSW.output_file']
146     if tmp != '':
147     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
148     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
149     for tmp in tmpOutFiles:
150     tmp=string.strip(tmp)
151     self.output_file.append(tmp)
152     pass
153     else:
154 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155 slacapra 1.1 pass
156     pass
157     except KeyError:
158 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159 slacapra 1.1 pass
160    
161     # script_exe file as additional file in inputSandbox
162     try:
163 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
164     if self.scriptExe != '':
165     if not os.path.isfile(self.scriptExe):
166 slacapra 1.64 msg ="ERROR. file "+self.scriptExe+" not found"
167 slacapra 1.10 raise CrabException(msg)
168 spiga 1.42 self.additional_inbox_files.append(string.strip(self.scriptExe))
169 slacapra 1.1 except KeyError:
170 spiga 1.42 self.scriptExe = ''
171 slacapra 1.70
172 spiga 1.42 #CarlosDaniele
173     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174 slacapra 1.70 msg ="Error. script_exe not defined"
175 spiga 1.42 raise CrabException(msg)
176    
177 slacapra 1.1 ## additional input files
178     try:
179 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
180 slacapra 1.70 for tmp in tmpAddFiles:
181     tmp = string.strip(tmp)
182     dirname = ''
183     if not tmp[0]=="/": dirname = "."
184 corvo 1.85 files = []
185     if string.find(tmp,"*")>-1:
186     files = glob.glob(os.path.join(dirname, tmp))
187     if len(files)==0:
188     raise CrabException("No additional input file found with this pattern: "+tmp)
189     else:
190     files.append(tmp)
191 slacapra 1.70 for file in files:
192     if not os.path.exists(file):
193     raise CrabException("Additional input file not found: "+file)
194 slacapra 1.45 pass
195 slacapra 1.105 # fname = string.split(file, '/')[-1]
196     # storedFile = common.work_space.pathForTgz()+'share/'+fname
197     # shutil.copyfile(file, storedFile)
198     self.additional_inbox_files.append(string.strip(file))
199 slacapra 1.1 pass
200     pass
201 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
202 slacapra 1.1 except KeyError:
203     pass
204    
205 slacapra 1.9 # files per job
206 slacapra 1.1 try:
207 gutsche 1.35 if (cfg_params['CMSSW.files_per_jobs']):
208     raise CrabException("files_per_jobs no longer supported. Quitting.")
209 gutsche 1.3 except KeyError:
210 gutsche 1.35 pass
211 gutsche 1.3
212 slacapra 1.9 ## Events per job
213 gutsche 1.3 try:
214 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
215 slacapra 1.9 self.selectEventsPerJob = 1
216 gutsche 1.3 except KeyError:
217 slacapra 1.9 self.eventsPerJob = -1
218     self.selectEventsPerJob = 0
219    
220 slacapra 1.22 ## number of jobs
221     try:
222     self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
223     self.selectNumberOfJobs = 1
224     except KeyError:
225     self.theNumberOfJobs = 0
226     self.selectNumberOfJobs = 0
227 slacapra 1.10
228 gutsche 1.35 try:
229     self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
230     self.selectTotalNumberEvents = 1
231     except KeyError:
232     self.total_number_of_events = 0
233     self.selectTotalNumberEvents = 0
234    
235 spiga 1.42 if self.pset != None: #CarlosDaniele
236     if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
237     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
238     raise CrabException(msg)
239     else:
240     if (self.selectNumberOfJobs == 0):
241     msg = 'Must specify number_of_jobs.'
242     raise CrabException(msg)
243 gutsche 1.35
244 slacapra 1.22 ## source seed for pythia
245     try:
246     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
247     except KeyError:
248 slacapra 1.23 self.sourceSeed = None
249     common.logger.debug(5,"No seed given")
250 slacapra 1.22
251 slacapra 1.28 try:
252     self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
253     except KeyError:
254     self.sourceSeedVtx = None
255     common.logger.debug(5,"No vertex seed given")
256 slacapra 1.90
257     try:
258     self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259     except KeyError:
260     self.sourceSeedG4 = None
261     common.logger.debug(5,"No g4 sim hits seed given")
262    
263     try:
264     self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265     except KeyError:
266     self.sourceSeedMix = None
267     common.logger.debug(5,"No mix seed given")
268    
269 spiga 1.57 try:
270     self.firstRun = int(cfg_params['CMSSW.first_run'])
271     except KeyError:
272     self.firstRun = None
273     common.logger.debug(5,"No first run given")
274 spiga 1.42 if self.pset != None: #CarlosDaniele
275 slacapra 1.97 ver = string.split(self.version,"_")
276     if (int(ver[1])>=1 and int(ver[2])>=5):
277     import PsetManipulator150 as pp
278     else:
279     import PsetManipulator as pp
280     PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
281 gutsche 1.3
282 slacapra 1.1 #DBSDLS-start
283     ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
284     self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
285     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
286 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
287 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
288 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
289 gutsche 1.35 blockSites = {}
290 slacapra 1.9 if self.datasetPath:
291 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
292 slacapra 1.1 #DBSDLS-end
293    
294     self.tgzNameWithPath = self.getTarBall(self.executable)
295 slacapra 1.10
296 slacapra 1.9 ## Select Splitting
297 spiga 1.42 if self.selectNoInput:
298     if self.pset == None: #CarlosDaniele
299     self.jobSplittingForScript()
300     else:
301     self.jobSplittingNoInput()
302 gutsche 1.92 else:
303 corvo 1.56 self.jobSplittingByBlocks(blockSites)
304 gutsche 1.5
305 slacapra 1.22 # modify Pset
306 spiga 1.42 if self.pset != None: #CarlosDaniele
307 slacapra 1.86 try:
308     if (self.datasetPath): # standard job
309     # allow to processa a fraction of events in a file
310 slacapra 1.90 PsetEdit.inputModule("INPUT")
311     PsetEdit.maxEvent("INPUTMAXEVENTS")
312     PsetEdit.skipEvent("INPUTSKIPEVENTS")
313 slacapra 1.86 else: # pythia like job
314 slacapra 1.90 PsetEdit.maxEvent(self.eventsPerJob)
315 slacapra 1.86 if (self.firstRun):
316 slacapra 1.90 PsetEdit.pythiaFirstRun("INPUTFIRSTRUN") #First Run
317 slacapra 1.86 if (self.sourceSeed) :
318 slacapra 1.90 PsetEdit.pythiaSeed("INPUT")
319 slacapra 1.86 if (self.sourceSeedVtx) :
320 slacapra 1.90 PsetEdit.vtxSeed("INPUTVTX")
321     if (self.sourceSeedG4) :
322 fanzago 1.116.2.1 PsetEdit.g4Seed("INPUTG4")
323 slacapra 1.90 if (self.sourceSeedMix) :
324 fanzago 1.116.2.1 PsetEdit.mixSeed("INPUTMIX")
325 slacapra 1.86 # add FrameworkJobReport to parameter-set
326 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
327     PsetEdit.psetWriter(self.configFilename())
328 slacapra 1.86 except:
329     msg='Error while manipuliating ParameterSet: exiting...'
330     raise CrabException(msg)
331 gutsche 1.3
332 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
333    
334 slacapra 1.86 import DataDiscovery
335     import DataDiscovery_DBS2
336     import DataLocation
337 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
338    
339     datasetPath=self.datasetPath
340    
341 slacapra 1.1 ## Contact the DBS
342 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
343 slacapra 1.1 try:
344 gutsche 1.66
345 slacapra 1.86 if self.use_dbs_1 == 1 :
346     self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
347     else :
348 corvo 1.85 self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
349 slacapra 1.1 self.pubdata.fetchDBSInfo()
350    
351 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
352 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
353     raise CrabException(msg)
354 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
355 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
356     raise CrabException(msg)
357 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
358 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
359 slacapra 1.1 raise CrabException(msg)
360 gutsche 1.67 except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
361     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
362     raise CrabException(msg)
363     except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
364     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
365     raise CrabException(msg)
366     except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
367     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
368     raise CrabException(msg)
369 slacapra 1.1
370 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
371 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
372     self.eventsbyfile=self.pubdata.getEventsPerFile()
373 gutsche 1.3
374 slacapra 1.1 ## get max number of events
375     self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
376    
377     ## Contact the DLS and build a list of sites hosting the fileblocks
378     try:
379 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
380 gutsche 1.6 dataloc.fetchDLSInfo()
381 slacapra 1.41 except DataLocation.DataLocationError , ex:
382 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
383     raise CrabException(msg)
384    
385    
386 gutsche 1.35 sites = dataloc.getSites()
387     allSites = []
388     listSites = sites.values()
389 slacapra 1.63 for listSite in listSites:
390     for oneSite in listSite:
391 gutsche 1.35 allSites.append(oneSite)
392     allSites = self.uniquelist(allSites)
393 gutsche 1.3
394 gutsche 1.92 # screen output
395     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
396    
397 gutsche 1.35 return sites
398 gutsche 1.3
399 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
400 slacapra 1.9 """
401 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
402     and no more than one block.
403     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
404     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
405     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
406     self.maxEvents, self.filesbyblock
407     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
408     self.total_number_of_jobs - Total # of jobs
409     self.list_of_args - File(s) job will run on (a list of lists)
410     """
411    
412     # ---- Handle the possible job splitting configurations ---- #
413     if (self.selectTotalNumberEvents):
414     totalEventsRequested = self.total_number_of_events
415     if (self.selectEventsPerJob):
416     eventsPerJobRequested = self.eventsPerJob
417     if (self.selectNumberOfJobs):
418     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
419    
420     # If user requested all the events in the dataset
421     if (totalEventsRequested == -1):
422     eventsRemaining=self.maxEvents
423     # If user requested more events than are in the dataset
424     elif (totalEventsRequested > self.maxEvents):
425     eventsRemaining = self.maxEvents
426     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
427     # If user requested less events than are in the dataset
428     else:
429     eventsRemaining = totalEventsRequested
430 slacapra 1.22
431 slacapra 1.41 # If user requested more events per job than are in the dataset
432     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
433     eventsPerJobRequested = self.maxEvents
434    
435 gutsche 1.35 # For user info at end
436     totalEventCount = 0
437 gutsche 1.3
438 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
439     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
440 slacapra 1.22
441 gutsche 1.35 if (self.selectNumberOfJobs):
442     common.logger.message("May not create the exact number_of_jobs requested.")
443 slacapra 1.23
444 gutsche 1.38 if ( self.ncjobs == 'all' ) :
445     totalNumberOfJobs = 999999999
446     else :
447     totalNumberOfJobs = self.ncjobs
448    
449    
450 gutsche 1.35 blocks = blockSites.keys()
451     blockCount = 0
452     # Backup variable in case self.maxEvents counted events in a non-included block
453     numBlocksInDataset = len(blocks)
454 gutsche 1.3
455 gutsche 1.35 jobCount = 0
456     list_of_lists = []
457 gutsche 1.3
458 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
459     jobsOfBlock = {}
460    
461 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
462     # ---- we've met the requested total # of events ---- #
463 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
464 gutsche 1.35 block = blocks[blockCount]
465 gutsche 1.44 blockCount += 1
466 gutsche 1.104 if block not in jobsOfBlock.keys() :
467     jobsOfBlock[block] = []
468 gutsche 1.44
469 gutsche 1.68 if self.eventsbyblock.has_key(block) :
470     numEventsInBlock = self.eventsbyblock[block]
471     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
472 slacapra 1.9
473 gutsche 1.68 files = self.filesbyblock[block]
474     numFilesInBlock = len(files)
475     if (numFilesInBlock <= 0):
476     continue
477     fileCount = 0
478    
479     # ---- New block => New job ---- #
480     parString = "\\{"
481     # counter for number of events in files currently worked on
482     filesEventCount = 0
483     # flag if next while loop should touch new file
484     newFile = 1
485     # job event counter
486     jobSkipEventCount = 0
487 slacapra 1.9
488 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
489     # ---- total # of events or we've gone over all the files in this block ---- #
490     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
491     file = files[fileCount]
492     if newFile :
493     try:
494     numEventsInFile = self.eventsbyfile[file]
495     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
496     # increase filesEventCount
497     filesEventCount += numEventsInFile
498     # Add file to current job
499     parString += '\\\"' + file + '\\\"\,'
500     newFile = 0
501     except KeyError:
502     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
503 slacapra 1.41
504 gutsche 1.38
505 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
506     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
507     # if last file in block
508     if ( fileCount == numFilesInBlock-1 ) :
509     # end job using last file, use remaining events in block
510     # close job and touch new file
511     fullString = parString[:-2]
512     fullString += '\\}'
513     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
514     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
515     self.jobDestination.append(blockSites[block])
516     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
517 gutsche 1.92 # fill jobs of block dictionary
518 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
519 gutsche 1.68 # reset counter
520     jobCount = jobCount + 1
521     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
522     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
523     jobSkipEventCount = 0
524     # reset file
525     parString = "\\{"
526     filesEventCount = 0
527     newFile = 1
528     fileCount += 1
529     else :
530     # go to next file
531     newFile = 1
532     fileCount += 1
533     # if events in file equal to eventsPerJobRequested
534     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
535 gutsche 1.38 # close job and touch new file
536     fullString = parString[:-2]
537     fullString += '\\}'
538 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
539     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
540 gutsche 1.38 self.jobDestination.append(blockSites[block])
541     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
542 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
543 gutsche 1.38 # reset counter
544     jobCount = jobCount + 1
545 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
546     eventsRemaining = eventsRemaining - eventsPerJobRequested
547 gutsche 1.38 jobSkipEventCount = 0
548     # reset file
549     parString = "\\{"
550     filesEventCount = 0
551     newFile = 1
552     fileCount += 1
553 gutsche 1.68
554     # if more events in file remain than eventsPerJobRequested
555 gutsche 1.38 else :
556 gutsche 1.68 # close job but don't touch new file
557     fullString = parString[:-2]
558     fullString += '\\}'
559     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
560     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
561     self.jobDestination.append(blockSites[block])
562     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
563 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
564 gutsche 1.68 # increase counter
565     jobCount = jobCount + 1
566     totalEventCount = totalEventCount + eventsPerJobRequested
567     eventsRemaining = eventsRemaining - eventsPerJobRequested
568     # calculate skip events for last file
569     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
570     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
571     # remove all but the last file
572     filesEventCount = self.eventsbyfile[file]
573     parString = "\\{"
574     parString += '\\\"' + file + '\\\"\,'
575     pass # END if
576     pass # END while (iterate over files in the block)
577 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
578 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
579 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
580 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
581 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
582 slacapra 1.22
583 gutsche 1.92 # screen output
584     screenOutput = "List of jobs and available destination sites:\n\n"
585    
586 fanzago 1.116.2.3 # keep trace of block with no sites to print a warning at the end
587     noSiteBlock = []
588     bloskNoSite = []
589    
590 gutsche 1.92 blockCounter = 0
591 gutsche 1.104 for block in blocks:
592     if block in jobsOfBlock.keys() :
593     blockCounter += 1
594 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
595 fanzago 1.116.2.3 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
596     noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
597     bloskNoSite.append( blockCounter )
598    
599 fanzago 1.115 common.logger.message(screenOutput)
600 fanzago 1.116.2.5 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
601 fanzago 1.116.2.4 msg = 'WARNING: No sites are hosting any part of data for block:\n '
602     virgola = ""
603     if len(bloskNoSite) > 1:
604     virgola = ","
605     for block in bloskNoSite:
606     msg += ' ' + str(block) + virgola
607     msg += '\n Related jobs:\n '
608     virgola = ""
609     if len(noSiteBlock) > 1:
610     virgola = ","
611     for range_jobs in noSiteBlock:
612     msg += str(range_jobs) + virgola
613     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
614     common.logger.message(msg)
615 fanzago 1.116.2.3
616 slacapra 1.9 self.list_of_args = list_of_lists
617     return
618    
619 slacapra 1.21 def jobSplittingNoInput(self):
620 slacapra 1.9 """
621     Perform job splitting based on number of event per job
622     """
623     common.logger.debug(5,'Splitting per events')
624     common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
625 slacapra 1.22 common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
626 slacapra 1.9 common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
627    
628 slacapra 1.10 if (self.total_number_of_events < 0):
629     msg='Cannot split jobs per Events with "-1" as total number of events'
630     raise CrabException(msg)
631    
632 slacapra 1.22 if (self.selectEventsPerJob):
633 spiga 1.65 if (self.selectTotalNumberEvents):
634     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
635     elif(self.selectNumberOfJobs) :
636     self.total_number_of_jobs =self.theNumberOfJobs
637     self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
638    
639 slacapra 1.22 elif (self.selectNumberOfJobs) :
640     self.total_number_of_jobs = self.theNumberOfJobs
641     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
642 spiga 1.65
643 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
644    
645     # is there any remainder?
646     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
647    
648     common.logger.debug(5,'Check '+str(check))
649    
650 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
651 slacapra 1.9 if check > 0:
652 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
653 slacapra 1.9
654 slacapra 1.10 # argument is seed number.$i
655 slacapra 1.9 self.list_of_args = []
656     for i in range(self.total_number_of_jobs):
657 gutsche 1.35 ## Since there is no input, any site is good
658 slacapra 1.86 # self.jobDestination.append(["Any"])
659 spiga 1.42 self.jobDestination.append([""]) #must be empty to write correctly the xml
660 slacapra 1.90 args=[]
661 spiga 1.57 if (self.firstRun):
662     ## pythia first run
663 slacapra 1.86 #self.list_of_args.append([(str(self.firstRun)+str(i))])
664 slacapra 1.90 args.append(str(self.firstRun)+str(i))
665 spiga 1.57 else:
666     ## no first run
667 slacapra 1.86 #self.list_of_args.append([str(i)])
668 slacapra 1.90 args.append(str(i))
669 slacapra 1.23 if (self.sourceSeed):
670 slacapra 1.90 args.append(str(self.sourceSeed)+str(i))
671 slacapra 1.28 if (self.sourceSeedVtx):
672 slacapra 1.90 ## + vtx random seed
673     args.append(str(self.sourceSeedVtx)+str(i))
674     if (self.sourceSeedG4):
675     ## + G4 random seed
676     args.append(str(self.sourceSeedG4)+str(i))
677     if (self.sourceSeedMix):
678     ## + Mix random seed
679     args.append(str(self.sourceSeedMix)+str(i))
680     pass
681     pass
682     self.list_of_args.append(args)
683     pass
684 slacapra 1.86
685 slacapra 1.90 # print self.list_of_args
686 gutsche 1.3
687     return
688    
689 spiga 1.42
690     def jobSplittingForScript(self):#CarlosDaniele
691     """
692     Perform job splitting based on number of job
693     """
694     common.logger.debug(5,'Splitting per job')
695     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
696    
697     self.total_number_of_jobs = self.theNumberOfJobs
698    
699     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
700    
701     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
702    
703     # argument is seed number.$i
704     self.list_of_args = []
705     for i in range(self.total_number_of_jobs):
706     ## Since there is no input, any site is good
707     # self.jobDestination.append(["Any"])
708     self.jobDestination.append([""])
709     ## no random seed
710     self.list_of_args.append([str(i)])
711     return
712    
713 gutsche 1.3 def split(self, jobParams):
714    
715     common.jobDB.load()
716     #### Fabio
717     njobs = self.total_number_of_jobs
718 slacapra 1.9 arglist = self.list_of_args
719 gutsche 1.3 # create the empty structure
720     for i in range(njobs):
721     jobParams.append("")
722    
723     for job in range(njobs):
724 slacapra 1.17 jobParams[job] = arglist[job]
725     # print str(arglist[job])
726     # print jobParams[job]
727 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
728 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
729     common.jobDB.setDestination(job, self.jobDestination[job])
730 gutsche 1.3
731     common.jobDB.save()
732     return
733    
734     def getJobTypeArguments(self, nj, sched):
735 slacapra 1.17 result = ''
736     for i in common.jobDB.arguments(nj):
737     result=result+str(i)+" "
738     return result
739 gutsche 1.3
740     def numberOfJobs(self):
741     # Fabio
742     return self.total_number_of_jobs
743    
744 slacapra 1.1 def getTarBall(self, exe):
745     """
746     Return the TarBall with lib and exe
747     """
748    
749     # if it exist, just return it
750 corvo 1.56 #
751     # Marco. Let's start to use relative path for Boss XML files
752     #
753     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
754 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
755     return self.tgzNameWithPath
756    
757     # Prepare a tar gzipped file with user binaries.
758     self.buildTar_(exe)
759    
760     return string.strip(self.tgzNameWithPath)
761    
762     def buildTar_(self, executable):
763    
764     # First of all declare the user Scram area
765     swArea = self.scram.getSWArea_()
766     #print "swArea = ", swArea
767 slacapra 1.63 # swVersion = self.scram.getSWVersion()
768     # print "swVersion = ", swVersion
769 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
770     #print "swReleaseTop = ", swReleaseTop
771    
772     ## check if working area is release top
773     if swReleaseTop == '' or swArea == swReleaseTop:
774     return
775    
776 slacapra 1.61 import tarfile
777     try: # create tar ball
778     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
779     ## First find the executable
780 slacapra 1.86 if (self.executable != ''):
781 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
782     if ( not exeWithPath ):
783     raise CrabException('User executable '+executable+' not found')
784    
785     ## then check if it's private or not
786     if exeWithPath.find(swReleaseTop) == -1:
787     # the exe is private, so we must ship
788     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
789     path = swArea+'/'
790 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
791     if exeWithPath.find(path) >= 0 :
792     exe = string.replace(exeWithPath, path,'')
793     tar.add(path+exe,os.path.basename(executable))
794     else :
795     tar.add(exeWithPath,os.path.basename(executable))
796 slacapra 1.61 pass
797     else:
798     # the exe is from release, we'll find it on WN
799     pass
800    
801     ## Now get the libraries: only those in local working area
802     libDir = 'lib'
803     lib = swArea+'/' +libDir
804     common.logger.debug(5,"lib "+lib+" to be tarred")
805     if os.path.exists(lib):
806     tar.add(lib,libDir)
807    
808     ## Now check if module dir is present
809     moduleDir = 'module'
810     module = swArea + '/' + moduleDir
811     if os.path.isdir(module):
812     tar.add(module,moduleDir)
813    
814     ## Now check if any data dir(s) is present
815     swAreaLen=len(swArea)
816     for root, dirs, files in os.walk(swArea):
817     if "data" in dirs:
818     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
819     tar.add(root+"/data",root[swAreaLen:]+"/data")
820    
821     ## Add ProdAgent dir to tar
822     paDir = 'ProdAgentApi'
823     pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
824     if os.path.isdir(pa):
825     tar.add(pa,paDir)
826 fanzago 1.93
827     ### FEDE FOR DBS PUBLICATION
828     ## Add PRODCOMMON dir to tar
829     prodcommonDir = 'ProdCommon'
830     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
831     if os.path.isdir(prodcommonPath):
832     tar.add(prodcommonPath,prodcommonDir)
833     #############################
834 slacapra 1.61
835     common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
836     tar.close()
837     except :
838     raise CrabException('Could not create tar-ball')
839 gutsche 1.72
840     ## check for tarball size
841     tarballinfo = os.stat(self.tgzNameWithPath)
842     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
843     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
844    
845 slacapra 1.61 ## create tar-ball with ML stuff
846 corvo 1.58 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
847 slacapra 1.61 try:
848     tar = tarfile.open(self.MLtgzfile, "w:gz")
849     path=os.environ['CRABDIR'] + '/python/'
850     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
851     tar.add(path+file,file)
852     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
853     tar.close()
854     except :
855 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
856    
857 slacapra 1.1 return
858    
859 slacapra 1.97 def additionalInputFileTgz(self):
860     """
861     Put all additional files into a tar ball and return its name
862     """
863     import tarfile
864     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
865     tar = tarfile.open(tarName, "w:gz")
866     for file in self.additional_inbox_files:
867     tar.add(file,string.split(file,'/')[-1])
868     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
869     tar.close()
870     return tarName
871    
872 slacapra 1.1 def wsSetupEnvironment(self, nj):
873     """
874     Returns part of a job script which prepares
875     the execution environment for the job 'nj'.
876     """
877     # Prepare JobType-independent part
878 gutsche 1.3 txt = ''
879    
880     ## OLI_Daniele at this level middleware already known
881    
882     txt += 'if [ $middleware == LCG ]; then \n'
883 fanzago 1.115 txt += ' echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
884     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
885     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
886 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
887     txt += 'elif [ $middleware == OSG ]; then\n'
888 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
889     txt += ' echo "Created working directory: $WORKING_DIR"\n'
890 gutsche 1.3 txt += ' if [ ! -d $WORKING_DIR ] ;then\n'
891 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
892 slacapra 1.90 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
893     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
894     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
895 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
896     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
897     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
898 gutsche 1.3 txt += ' exit 1\n'
899     txt += ' fi\n'
900     txt += '\n'
901     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
902     txt += ' cd $WORKING_DIR\n'
903     txt += self.wsSetupCMSOSGEnvironment_()
904 fanzago 1.115 txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
905     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
906 gutsche 1.3 txt += 'fi\n'
907 slacapra 1.1
908     # Prepare JobType-specific part
909     scram = self.scram.commandName()
910     txt += '\n\n'
911     txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
912     txt += scram+' project CMSSW '+self.version+'\n'
913     txt += 'status=$?\n'
914     txt += 'if [ $status != 0 ] ; then\n'
915 gutsche 1.7 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
916 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
917 gutsche 1.7 txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
918 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
919 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
920     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
921     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
922 gutsche 1.3 ## OLI_Daniele
923     txt += ' if [ $middleware == OSG ]; then \n'
924     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
925     txt += ' cd $RUNTIME_AREA\n'
926     txt += ' /bin/rm -rf $WORKING_DIR\n'
927     txt += ' if [ -d $WORKING_DIR ] ;then\n'
928 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
929     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
930     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
931     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
932 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
933     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
934     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
935 gutsche 1.3 txt += ' fi\n'
936     txt += ' fi \n'
937     txt += ' exit 1 \n'
938 slacapra 1.1 txt += 'fi \n'
939     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
940     txt += 'cd '+self.version+'\n'
941 fanzago 1.99 ########## FEDE FOR DBS2 ######################
942     txt += 'SOFTWARE_DIR=`pwd`\n'
943     txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
944     ###############################################
945 slacapra 1.1 ### needed grep for bug in scramv1 ###
946 corvo 1.58 txt += scram+' runtime -sh\n'
947 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
948 corvo 1.58 txt += 'echo $PATH\n'
949 slacapra 1.1
950     # Handle the arguments:
951     txt += "\n"
952 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
953 slacapra 1.1 txt += "\n"
954 mkirn 1.32 # txt += "narg=$#\n"
955     txt += "if [ $nargs -lt 2 ]\n"
956 slacapra 1.1 txt += "then\n"
957 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
958 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
959 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
960 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
961 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
962     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
963     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
964 gutsche 1.3 ## OLI_Daniele
965     txt += ' if [ $middleware == OSG ]; then \n'
966     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
967     txt += ' cd $RUNTIME_AREA\n'
968     txt += ' /bin/rm -rf $WORKING_DIR\n'
969     txt += ' if [ -d $WORKING_DIR ] ;then\n'
970 fanzago 1.96 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
971     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
972     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
973     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
974 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
975     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
976     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
977 gutsche 1.3 txt += ' fi\n'
978     txt += ' fi \n'
979 slacapra 1.1 txt += " exit 1\n"
980     txt += "fi\n"
981     txt += "\n"
982    
983     # Prepare job-specific part
984     job = common.job_list[nj]
985 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
986     if (self.datasetPath):
987     txt += '\n'
988     txt += 'DatasetPath='+self.datasetPath+'\n'
989    
990     datasetpath_split = self.datasetPath.split("/")
991    
992     txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
993     txt += 'DataTier='+datasetpath_split[2]+'\n'
994 fanzago 1.96 #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
995     txt += 'ApplicationFamily=cmsRun\n'
996 fanzago 1.93
997     else:
998     txt += 'DatasetPath=MCDataTier\n'
999     txt += 'PrimaryDataset=null\n'
1000     txt += 'DataTier=null\n'
1001 fanzago 1.96 #txt += 'ProcessedDataset=null\n'
1002 fanzago 1.93 txt += 'ApplicationFamily=MCDataTier\n'
1003 spiga 1.42 if self.pset != None: #CarlosDaniele
1004     pset = os.path.basename(job.configFilename())
1005     txt += '\n'
1006 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
1007 spiga 1.42 if (self.datasetPath): # standard job
1008     #txt += 'InputFiles=$2\n'
1009     txt += 'InputFiles=${args[1]}\n'
1010     txt += 'MaxEvents=${args[2]}\n'
1011     txt += 'SkipEvents=${args[3]}\n'
1012     txt += 'echo "Inputfiles:<$InputFiles>"\n'
1013 slacapra 1.90 txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1014 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1015 slacapra 1.90 txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1016 spiga 1.42 txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1017 slacapra 1.90 txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1018 spiga 1.42 else: # pythia like job
1019 slacapra 1.90 seedIndex=1
1020     if (self.firstRun):
1021     txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
1022 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
1023 slacapra 1.90 txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1024     seedIndex=seedIndex+1
1025    
1026 spiga 1.57 if (self.sourceSeed):
1027 slacapra 1.90 txt += 'Seed=${args['+str(seedIndex)+']}\n'
1028     txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1029     seedIndex=seedIndex+1
1030     ## the following seeds are not always present
1031 spiga 1.42 if (self.sourceSeedVtx):
1032 slacapra 1.90 txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1033 spiga 1.42 txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1034 slacapra 1.90 txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1035     seedIndex += 1
1036     if (self.sourceSeedG4):
1037     txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1038     txt += 'echo "G4Seed: <$G4Seed>"\n'
1039     txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1040     seedIndex += 1
1041     if (self.sourceSeedMix):
1042     txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1043     txt += 'echo "MixSeed: <$mixSeed>"\n'
1044     txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1045     seedIndex += 1
1046     pass
1047     pass
1048     txt += 'mv -f '+pset+' pset.cfg\n'
1049 slacapra 1.1
1050     if len(self.additional_inbox_files) > 0:
1051 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1052     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1053     txt += 'fi\n'
1054 slacapra 1.1 pass
1055    
1056 spiga 1.42 if self.pset != None: #CarlosDaniele
1057     txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1058    
1059     txt += '\n'
1060     txt += 'echo "***** cat pset.cfg *********"\n'
1061     txt += 'cat pset.cfg\n'
1062     txt += 'echo "****** end pset.cfg ********"\n'
1063     txt += '\n'
1064 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1065 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1066     txt += 'echo "PSETHASH = $PSETHASH" \n'
1067 fanzago 1.93 ##############
1068     txt += '\n'
1069     # txt += 'echo "***** cat pset1.cfg *********"\n'
1070     # txt += 'cat pset1.cfg\n'
1071     # txt += 'echo "****** end pset1.cfg ********"\n'
1072 gutsche 1.3 return txt
1073    
1074 slacapra 1.63 def wsBuildExe(self, nj=0):
1075 gutsche 1.3 """
1076     Put in the script the commands to build an executable
1077     or a library.
1078     """
1079    
1080     txt = ""
1081    
1082     if os.path.isfile(self.tgzNameWithPath):
1083     txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
1084     txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1085     txt += 'untar_status=$? \n'
1086     txt += 'if [ $untar_status -ne 0 ]; then \n'
1087     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1088     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
1089 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1090 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
1091     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1092     txt += ' cd $RUNTIME_AREA\n'
1093     txt += ' /bin/rm -rf $WORKING_DIR\n'
1094     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1095 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1096     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
1097     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1098     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1099     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1100     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1101     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1102 gutsche 1.3 txt += ' fi\n'
1103     txt += ' fi \n'
1104     txt += ' \n'
1105 gutsche 1.7 txt += ' exit 1 \n'
1106 gutsche 1.3 txt += 'else \n'
1107     txt += ' echo "Successful untar" \n'
1108     txt += 'fi \n'
1109 gutsche 1.50 txt += '\n'
1110 fanzago 1.93 txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1111 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1112 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
1113 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1114     #txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1115 fanzago 1.93 #txt += ' export PYTHONPATH=ProdAgentApi\n'
1116 gutsche 1.50 txt += 'else\n'
1117 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1118     #txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1119 fanzago 1.93 #txt += ' export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1120     txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1121     ###################
1122 gutsche 1.50 txt += 'fi\n'
1123     txt += '\n'
1124    
1125 gutsche 1.3 pass
1126    
1127 slacapra 1.1 return txt
1128    
1129     def modifySteeringCards(self, nj):
1130     """
1131     modify the card provided by the user,
1132     writing a new card into share dir
1133     """
1134    
1135     def executableName(self):
1136 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1137 spiga 1.42 return "sh "
1138     else:
1139     return self.executable
1140 slacapra 1.1
1141     def executableArgs(self):
1142 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1143 spiga 1.42 return self.scriptExe + " $NJob"
1144 fanzago 1.115 else:
1145     # if >= CMSSW_1_5_X, add -e
1146     version_array = self.scram.getSWVersion().split('_')
1147     major = 0
1148     minor = 0
1149     try:
1150     major = int(version_array[1])
1151     minor = int(version_array[2])
1152     except:
1153     msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1154     raise CrabException(msg)
1155     if major >= 1 and minor >= 5 :
1156     return " -e -p pset.cfg"
1157     else:
1158     return " -p pset.cfg"
1159 slacapra 1.1
1160     def inputSandbox(self, nj):
1161     """
1162     Returns a list of filenames to be put in JDL input sandbox.
1163     """
1164     inp_box = []
1165 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1166     # seen = {}
1167 slacapra 1.1 ## code
1168     if os.path.isfile(self.tgzNameWithPath):
1169     inp_box.append(self.tgzNameWithPath)
1170 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1171     inp_box.append(self.MLtgzfile)
1172 slacapra 1.1 ## config
1173 slacapra 1.70 if not self.pset is None:
1174 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1175 slacapra 1.1 ## additional input files
1176 slacapra 1.97 tgz = self.additionalInputFileTgz()
1177     inp_box.append(tgz)
1178 slacapra 1.1 return inp_box
1179    
1180     def outputSandbox(self, nj):
1181     """
1182     Returns a list of filenames to be put in JDL output sandbox.
1183     """
1184     out_box = []
1185    
1186     ## User Declared output files
1187 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1188 slacapra 1.1 n_out = nj + 1
1189     out_box.append(self.numberFile_(out,str(n_out)))
1190     return out_box
1191    
1192     def prepareSteeringCards(self):
1193     """
1194     Make initial modifications of the user's steering card file.
1195     """
1196     return
1197    
1198     def wsRenameOutput(self, nj):
1199     """
1200     Returns part of a job script which renames the produced files.
1201     """
1202    
1203     txt = '\n'
1204 gutsche 1.7 txt += '# directory content\n'
1205     txt += 'ls \n'
1206 slacapra 1.54
1207 fanzago 1.116.2.6 txt += 'output_exit_status=0\n'
1208    
1209     for fileWithSuffix in (self.output_file_sandbox):
1210     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1211     txt += '\n'
1212     txt += '# check output file\n'
1213     txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1214     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1215     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1216     txt += 'else\n'
1217     txt += ' exit_status=60302\n'
1218     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1219     if common.scheduler.boss_scheduler_name == 'condor_g':
1220     txt += ' if [ $middleware == OSG ]; then \n'
1221     txt += ' echo "prepare dummy output file"\n'
1222     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1223     txt += ' fi \n'
1224     txt += 'fi\n'
1225    
1226     for fileWithSuffix in (self.output_file):
1227 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1228     txt += '\n'
1229 gutsche 1.7 txt += '# check output file\n'
1230 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1231 fanzago 1.116.2.1 txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1232     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1233 slacapra 1.106 txt += 'else\n'
1234 fanzago 1.116.2.1 txt += ' exit_status=60302\n'
1235     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1236 fanzago 1.116.2.6 txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1237     txt += ' output_exit_status=$exit_status\n'
1238 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
1239     txt += ' if [ $middleware == OSG ]; then \n'
1240     txt += ' echo "prepare dummy output file"\n'
1241     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1242     txt += ' fi \n'
1243 slacapra 1.1 txt += 'fi\n'
1244 slacapra 1.105 file_list = []
1245     for fileWithSuffix in (self.output_file):
1246     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1247 fanzago 1.116.2.1
1248 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1249 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1250 slacapra 1.1 return txt
1251    
1252     def numberFile_(self, file, txt):
1253     """
1254     append _'txt' before last extension of a file
1255     """
1256     p = string.split(file,".")
1257     # take away last extension
1258     name = p[0]
1259     for x in p[1:-1]:
1260 slacapra 1.90 name=name+"."+x
1261 slacapra 1.1 # add "_txt"
1262     if len(p)>1:
1263 slacapra 1.90 ext = p[len(p)-1]
1264     result = name + '_' + txt + "." + ext
1265 slacapra 1.1 else:
1266 slacapra 1.90 result = name + '_' + txt
1267 slacapra 1.1
1268     return result
1269    
1270 slacapra 1.63 def getRequirements(self, nj=[]):
1271 slacapra 1.1 """
1272     return job requirements to add to jdl files
1273     """
1274     req = ''
1275 slacapra 1.47 if self.version:
1276 slacapra 1.10 req='Member("VO-cms-' + \
1277 slacapra 1.47 self.version + \
1278 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1279 farinafa 1.111 ## SL add requirement for OS version only if SL4
1280     #reSL4 = re.compile( r'slc4' )
1281 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1282 gutsche 1.107 req+=' && Member("VO-cms-' + \
1283 slacapra 1.105 self.executable_arch + \
1284     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1285 gutsche 1.35
1286     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1287    
1288 slacapra 1.1 return req
1289 gutsche 1.3
1290     def configFilename(self):
1291     """ return the config filename """
1292     return self.name()+'.cfg'
1293    
1294     ### OLI_DANIELE
1295     def wsSetupCMSOSGEnvironment_(self):
1296     """
1297     Returns part of a job script which is prepares
1298     the execution environment and which is common for all CMS jobs.
1299     """
1300     txt = '\n'
1301     txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n'
1302     txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1303     txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1304 spiga 1.87 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1305 gutsche 1.3 txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1306 mkirn 1.40 txt += ' elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1307     txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1308 spiga 1.87 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1309 mkirn 1.40 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1310 gutsche 1.3 txt += ' else\n'
1311 mkirn 1.40 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1312 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1313     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1314     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1315 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1316     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1317     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1318 gutsche 1.7 txt += ' exit 1\n'
1319 gutsche 1.3 txt += '\n'
1320     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1321     txt += ' cd $RUNTIME_AREA\n'
1322     txt += ' /bin/rm -rf $WORKING_DIR\n'
1323     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1324 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1325     txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1326     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1327     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1328     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1329     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1330     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1331 gutsche 1.3 txt += ' fi\n'
1332     txt += '\n'
1333 gutsche 1.7 txt += ' exit 1\n'
1334 gutsche 1.3 txt += ' fi\n'
1335     txt += '\n'
1336     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1337     txt += ' echo " END SETUP CMS OSG ENVIRONMENT "\n'
1338    
1339     return txt
1340    
1341     ### OLI_DANIELE
1342     def wsSetupCMSLCGEnvironment_(self):
1343     """
1344     Returns part of a job script which is prepares
1345     the execution environment and which is common for all CMS jobs.
1346     """
1347     txt = ' \n'
1348     txt += ' echo " ### SETUP CMS LCG ENVIRONMENT ### "\n'
1349     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1350     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1351     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1352     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1353     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1354 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1355     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1356     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1357 gutsche 1.7 txt += ' exit 1\n'
1358 gutsche 1.3 txt += ' else\n'
1359     txt += ' echo "Sourcing environment... "\n'
1360     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1361     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1362     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1363     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1364     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1365 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1366     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1367     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1368 gutsche 1.7 txt += ' exit 1\n'
1369 gutsche 1.3 txt += ' fi\n'
1370     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1371     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1372     txt += ' result=$?\n'
1373     txt += ' if [ $result -ne 0 ]; then\n'
1374     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1375     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1376     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1377     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1378 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1379     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1380     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1381 gutsche 1.7 txt += ' exit 1\n'
1382 gutsche 1.3 txt += ' fi\n'
1383     txt += ' fi\n'
1384     txt += ' \n'
1385     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1386     txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1387     return txt
1388 gutsche 1.5
1389 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1390     def modifyReport(self, nj):
1391     """
1392     insert the part of the script that modifies the FrameworkJob Report
1393     """
1394 fanzago 1.94
1395 fanzago 1.93 txt = ''
1396 fanzago 1.94 try:
1397     publish_data = int(self.cfg_params['USER.publish_data'])
1398     except KeyError:
1399     publish_data = 0
1400 fanzago 1.116.2.1 if (publish_data == 1):
1401     txt += 'echo "Modify Job Report" \n'
1402     #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1403     ################ FEDE FOR DBS2 #############################################
1404     txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1405     #############################################################################
1406     #try:
1407     # publish_data = int(self.cfg_params['USER.publish_data'])
1408     #except KeyError:
1409     # publish_data = 0
1410 fanzago 1.94
1411 fanzago 1.116.2.1 txt += 'if [ -z "$SE" ]; then\n'
1412     txt += ' SE="" \n'
1413     txt += 'fi \n'
1414     txt += 'if [ -z "$SE_PATH" ]; then\n'
1415     txt += ' SE_PATH="" \n'
1416     txt += 'fi \n'
1417     txt += 'echo "SE = $SE"\n'
1418     txt += 'echo "SE_PATH = $SE_PATH"\n'
1419 fanzago 1.94
1420 fanzago 1.116.2.1 #if (publish_data == 1):
1421 fanzago 1.94 #processedDataset = self.cfg_params['USER.processed_datasetname']
1422     processedDataset = self.cfg_params['USER.publish_data_name']
1423     txt += 'ProcessedDataset='+processedDataset+'\n'
1424     #### LFN=/store/user/<user>/processedDataset_PSETHASH
1425     txt += 'if [ "$SE_PATH" == "" ]; then\n'
1426 fanzago 1.101 #### FEDE: added slash in LFN ##############
1427     txt += ' FOR_LFN=/copy_problems/ \n'
1428 fanzago 1.94 txt += 'else \n'
1429     txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1430 fanzago 1.101 ##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1431 fanzago 1.94 txt += ' FOR_LFN=/store$tmp \n'
1432     txt += 'fi \n'
1433     txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1434     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1435 spiga 1.103 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1436     #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1437     txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1438     txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1439     #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1440    
1441     txt += 'modifyReport_result=$?\n'
1442     txt += 'echo modifyReport_result = $modifyReport_result\n'
1443     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1444     txt += ' exit_status=1\n'
1445     txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1446     txt += 'else\n'
1447     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1448     txt += 'fi\n'
1449 fanzago 1.94 else:
1450 fanzago 1.116.2.1 txt += 'echo "no data publication required"\n'
1451     #txt += 'ProcessedDataset=no_data_to_publish \n'
1452 fanzago 1.101 #### FEDE: added slash in LFN ##############
1453 fanzago 1.116.2.1 #txt += 'FOR_LFN=/local/ \n'
1454     #txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1455     #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1456 fanzago 1.93 return txt
1457 fanzago 1.99
1458     def cleanEnv(self):
1459     ### OLI_DANIELE
1460     txt = ''
1461     txt += 'if [ $middleware == OSG ]; then\n'
1462     txt += ' cd $RUNTIME_AREA\n'
1463     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1464     txt += ' /bin/rm -rf $WORKING_DIR\n'
1465     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1466     txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1467     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1468     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1469     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1470     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1471     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1472     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1473     txt += ' fi\n'
1474     txt += 'fi\n'
1475     txt += '\n'
1476     return txt
1477 fanzago 1.93
1478 gutsche 1.5 def setParam_(self, param, value):
1479     self._params[param] = value
1480    
1481     def getParams(self):
1482     return self._params
1483 gutsche 1.8
1484     def setTaskid_(self):
1485     self._taskId = self.cfg_params['taskId']
1486    
1487     def getTaskid(self):
1488     return self._taskId
1489 gutsche 1.35
1490     def uniquelist(self, old):
1491     """
1492     remove duplicates from a list
1493     """
1494     nd={}
1495     for e in old:
1496     nd[e]=0
1497     return nd.keys()
1498 fanzago 1.116.2.1
1499    
1500     def checkOut(self, limit):
1501     """
1502     check the dimension of the output files
1503     """
1504     txt = 'echo "*****************************************"\n'
1505     txt += 'echo "** Starting output sandbox limit check **"\n'
1506     txt += 'echo "*****************************************"\n'
1507     allOutFiles = ""
1508     listOutFiles = []
1509     for fileOut in (self.output_file+self.output_file_sandbox):
1510     if fileOut.find('crab_fjr') == -1:
1511     allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1512     listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1513     txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1514     txt += 'ls -gGhrta;\n'
1515     txt += 'sum=0;\n'
1516     txt += 'for file in '+str(allOutFiles)+' ; do\n'
1517     txt += ' if [ -e $file ]; then\n'
1518     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1519     txt += ' sum=`expr $sum + $tt`\n'
1520     txt += ' else\n'
1521     txt += ' echo "WARNING: output file $file not found!"\n'
1522     txt += ' fi\n'
1523     txt += 'done\n'
1524     txt += 'echo "Total Output dimension: $sum";\n'
1525     txt += 'limit='+str(limit)+';\n'
1526     txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1527     txt += 'if [ $limit -lt $sum ]; then\n'
1528     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1529     txt += ' echo " checking the output file sizes..."\n'
1530     """
1531     txt += ' dim=0;\n'
1532     txt += ' exclude=0;\n'
1533     txt += ' for files in '+str(allOutFiles)+' ; do\n'
1534     txt += ' sumTemp=0;\n'
1535     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1536     txt += ' if [ $file != $file2 ]; then\n'
1537     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1538     txt += ' sumTemp=`expr $sumTemp + $tt`;\n'
1539     txt += ' fi\n'
1540     txt += ' done\n'
1541     txt += ' if [ $sumTemp -lt $limit ]; then\n'
1542     txt += ' if [ $dim -lt $sumTemp ]; then\n'
1543     txt += ' dim=$sumTemp;\n'
1544     txt += ' exclude=$file;\n'
1545     txt += ' fi\n'
1546     txt += ' fi\n'
1547     txt += ' done\n'
1548     txt += ' echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1549     """
1550     txt += ' tot=0;\n'
1551     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1552     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1553     txt += ' tot=`expr $tot + $tt`;\n'
1554     txt += ' if [ $limit -lt $tot ]; then\n'
1555     txt += ' tot=`expr $tot - $tt`;\n'
1556     txt += ' fileLast=$file;\n'
1557     txt += ' break;\n'
1558     txt += ' fi\n'
1559     txt += ' done\n'
1560     txt += ' echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1561     txt += ' flag=0;\n'
1562     txt += ' for filess in '+str(allOutFiles)+' ; do\n'
1563     txt += ' if [ $fileLast = $filess ]; then\n'
1564     txt += ' flag=1;\n'
1565     txt += ' fi\n'
1566     txt += ' if [ $flag -eq 1 ]; then\n'
1567     txt += ' rm -f $filess;\n'
1568     txt += ' fi\n'
1569     txt += ' done\n'
1570     txt += ' ls -agGhrt;\n'
1571     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1572     txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1573     txt += ' exit_status=70000;\n'
1574     txt += 'else'
1575     txt += ' echo "Total Output dimension $sum is fine.";\n'
1576     txt += 'fi\n'
1577     txt += 'echo "*****************************************"\n'
1578     txt += 'echo "*** Ending output sandbox limit check ***"\n'
1579     txt += 'echo "*****************************************"\n'
1580     return txt