ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.138
Committed: Fri Nov 16 13:20:56 2007 UTC (17 years, 5 months ago) by slacapra
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_0_2, CRAB_2_0_2_pre6
Changes since 1.137: +1 -7 lines
Log Message:
fix bug in setting sourceSeed from cfg

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8    
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 gutsche 1.38 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 gutsche 1.3 self._params = {}
17     self.cfg_params = cfg_params
18 gutsche 1.38
19 fanzago 1.115 # init BlackWhiteListParser
20     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21    
22 gutsche 1.72 try:
23     self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24     except KeyError:
25 slacapra 1.86 self.MaxTarBallSize = 9.5
26 gutsche 1.72
27 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
28 gutsche 1.38 self.ncjobs = ncjobs
29    
30 slacapra 1.1 log = common.logger
31 ewv 1.131
32 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
33     self.additional_inbox_files = []
34     self.scriptExe = ''
35     self.executable = ''
36 slacapra 1.71 self.executable_arch = self.scram.getArch()
37 slacapra 1.1 self.tgz_name = 'default.tgz'
38 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
39 corvo 1.56 self.scriptName = 'CMSSW.sh'
40 ewv 1.131 self.pset = '' #scrip use case Da
41 spiga 1.42 self.datasetPath = '' #scrip use case Da
42 gutsche 1.3
43 gutsche 1.50 # set FJR file name
44     self.fjrFileName = 'crab_fjr.xml'
45    
46 slacapra 1.1 self.version = self.scram.getSWVersion()
47 ewv 1.131
48 spiga 1.114 #
49     # Try to block creation in case of arch/version mismatch
50     #
51    
52     a = string.split(self.version, "_")
53    
54     if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55 fanzago 1.134 msg = "Warning: You are using %s version of CMSSW with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
56     common.logger.message(msg)
57 spiga 1.114 if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59     raise CrabException(msg)
60 ewv 1.131
61 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
62 gutsche 1.5 self.setParam_('application', self.version)
63 slacapra 1.47
64 slacapra 1.1 ### collect Data cards
65 gutsche 1.66
66 slacapra 1.1 try:
67 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
68     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
69     if string.lower(tmp)=='none':
70     self.datasetPath = None
71 slacapra 1.21 self.selectNoInput = 1
72 slacapra 1.9 else:
73     self.datasetPath = tmp
74 slacapra 1.21 self.selectNoInput = 0
75 slacapra 1.1 except KeyError:
76 ewv 1.131 msg = "Error: datasetpath not defined "
77 slacapra 1.1 raise CrabException(msg)
78 gutsche 1.5
79     # ML monitoring
80     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
81 slacapra 1.9 if not self.datasetPath:
82     self.setParam_('dataset', 'None')
83     self.setParam_('owner', 'None')
84     else:
85 gutsche 1.92 try:
86     datasetpath_split = self.datasetPath.split("/")
87     # standard style
88 mcinquil 1.120 self.setParam_('datasetFull', self.datasetPath)
89 slacapra 1.137 self.setParam_('dataset', datasetpath_split[1])
90     self.setParam_('owner', datasetpath_split[2])
91 gutsche 1.92 except:
92     self.setParam_('dataset', self.datasetPath)
93     self.setParam_('owner', self.datasetPath)
94 ewv 1.131
95 gutsche 1.8 self.setTaskid_()
96     self.setParam_('taskId', self.cfg_params['taskId'])
97 gutsche 1.5
98 slacapra 1.1 self.dataTiers = []
99    
100     ## now the application
101     try:
102     self.executable = cfg_params['CMSSW.executable']
103 gutsche 1.5 self.setParam_('exe', self.executable)
104 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
105     msg = "Default executable cmsRun overridden. Switch to " + self.executable
106     log.debug(3,msg)
107     except KeyError:
108     self.executable = 'cmsRun'
109 gutsche 1.5 self.setParam_('exe', self.executable)
110 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
111     log.debug(3,msg)
112     pass
113    
114     try:
115     self.pset = cfg_params['CMSSW.pset']
116     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
117 ewv 1.131 if self.pset.lower() != 'none' :
118 spiga 1.42 if (not os.path.exists(self.pset)):
119     raise CrabException("User defined PSet file "+self.pset+" does not exist")
120     else:
121     self.pset = None
122 slacapra 1.1 except KeyError:
123     raise CrabException("PSet file missing. Cannot run cmsRun ")
124    
125     # output files
126 slacapra 1.53 ## stuff which must be returned always via sandbox
127     self.output_file_sandbox = []
128    
129     # add fjr report by default via sandbox
130     self.output_file_sandbox.append(self.fjrFileName)
131    
132     # other output files to be returned via sandbox or copied to SE
133 slacapra 1.1 try:
134     self.output_file = []
135     tmp = cfg_params['CMSSW.output_file']
136     if tmp != '':
137     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
138     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
139     for tmp in tmpOutFiles:
140     tmp=string.strip(tmp)
141     self.output_file.append(tmp)
142     pass
143     else:
144 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
145 slacapra 1.1 pass
146     pass
147     except KeyError:
148 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
149 slacapra 1.1 pass
150    
151     # script_exe file as additional file in inputSandbox
152     try:
153 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
154     if self.scriptExe != '':
155     if not os.path.isfile(self.scriptExe):
156 slacapra 1.64 msg ="ERROR. file "+self.scriptExe+" not found"
157 slacapra 1.10 raise CrabException(msg)
158 spiga 1.42 self.additional_inbox_files.append(string.strip(self.scriptExe))
159 slacapra 1.1 except KeyError:
160 spiga 1.42 self.scriptExe = ''
161 slacapra 1.70
162 spiga 1.42 #CarlosDaniele
163     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
164 slacapra 1.70 msg ="Error. script_exe not defined"
165 spiga 1.42 raise CrabException(msg)
166    
167 slacapra 1.1 ## additional input files
168     try:
169 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
170 slacapra 1.70 for tmp in tmpAddFiles:
171     tmp = string.strip(tmp)
172     dirname = ''
173     if not tmp[0]=="/": dirname = "."
174 corvo 1.85 files = []
175     if string.find(tmp,"*")>-1:
176     files = glob.glob(os.path.join(dirname, tmp))
177     if len(files)==0:
178     raise CrabException("No additional input file found with this pattern: "+tmp)
179     else:
180     files.append(tmp)
181 slacapra 1.70 for file in files:
182     if not os.path.exists(file):
183     raise CrabException("Additional input file not found: "+file)
184 slacapra 1.45 pass
185 slacapra 1.105 # fname = string.split(file, '/')[-1]
186     # storedFile = common.work_space.pathForTgz()+'share/'+fname
187     # shutil.copyfile(file, storedFile)
188     self.additional_inbox_files.append(string.strip(file))
189 slacapra 1.1 pass
190     pass
191 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
192 slacapra 1.1 except KeyError:
193     pass
194    
195 slacapra 1.9 # files per job
196 slacapra 1.1 try:
197 gutsche 1.35 if (cfg_params['CMSSW.files_per_jobs']):
198     raise CrabException("files_per_jobs no longer supported. Quitting.")
199 gutsche 1.3 except KeyError:
200 gutsche 1.35 pass
201 gutsche 1.3
202 slacapra 1.9 ## Events per job
203 gutsche 1.3 try:
204 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
205 slacapra 1.9 self.selectEventsPerJob = 1
206 gutsche 1.3 except KeyError:
207 slacapra 1.9 self.eventsPerJob = -1
208     self.selectEventsPerJob = 0
209 ewv 1.131
210 slacapra 1.22 ## number of jobs
211     try:
212     self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
213     self.selectNumberOfJobs = 1
214     except KeyError:
215     self.theNumberOfJobs = 0
216     self.selectNumberOfJobs = 0
217 slacapra 1.10
218 gutsche 1.35 try:
219     self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
220     self.selectTotalNumberEvents = 1
221     except KeyError:
222     self.total_number_of_events = 0
223     self.selectTotalNumberEvents = 0
224    
225 ewv 1.131 if self.pset != None: #CarlosDaniele
226 spiga 1.42 if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
227     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
228     raise CrabException(msg)
229     else:
230     if (self.selectNumberOfJobs == 0):
231     msg = 'Must specify number_of_jobs.'
232     raise CrabException(msg)
233 gutsche 1.35
234 slacapra 1.22 ## source seed for pythia
235     try:
236     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
237     except KeyError:
238 slacapra 1.23 self.sourceSeed = None
239     common.logger.debug(5,"No seed given")
240 slacapra 1.22
241 slacapra 1.28 try:
242     self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
243     except KeyError:
244     self.sourceSeedVtx = None
245     common.logger.debug(5,"No vertex seed given")
246 slacapra 1.90
247     try:
248     self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
249     except KeyError:
250     self.sourceSeedG4 = None
251     common.logger.debug(5,"No g4 sim hits seed given")
252    
253     try:
254     self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
255     except KeyError:
256     self.sourceSeedMix = None
257     common.logger.debug(5,"No mix seed given")
258    
259 spiga 1.57 try:
260     self.firstRun = int(cfg_params['CMSSW.first_run'])
261     except KeyError:
262     self.firstRun = None
263     common.logger.debug(5,"No first run given")
264 spiga 1.42 if self.pset != None: #CarlosDaniele
265 ewv 1.131 import PsetManipulator as pp
266 slacapra 1.97 PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
267 gutsche 1.3
268 slacapra 1.1 #DBSDLS-start
269 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
270 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
271     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
272 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
273 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
274 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
275 gutsche 1.35 blockSites = {}
276 slacapra 1.9 if self.datasetPath:
277 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
278 ewv 1.131 #DBSDLS-end
279 slacapra 1.1
280     self.tgzNameWithPath = self.getTarBall(self.executable)
281 ewv 1.131
282 slacapra 1.9 ## Select Splitting
283 ewv 1.131 if self.selectNoInput:
284 spiga 1.42 if self.pset == None: #CarlosDaniele
285     self.jobSplittingForScript()
286     else:
287     self.jobSplittingNoInput()
288 gutsche 1.92 else:
289 corvo 1.56 self.jobSplittingByBlocks(blockSites)
290 gutsche 1.5
291 slacapra 1.22 # modify Pset
292 spiga 1.42 if self.pset != None: #CarlosDaniele
293 slacapra 1.86 try:
294     if (self.datasetPath): # standard job
295     # allow to processa a fraction of events in a file
296 ewv 1.131 PsetEdit.inputModule("INPUTFILE")
297     PsetEdit.maxEvent(0)
298     PsetEdit.skipEvent(0)
299 slacapra 1.86 else: # pythia like job
300 slacapra 1.90 PsetEdit.maxEvent(self.eventsPerJob)
301 slacapra 1.86 if (self.firstRun):
302 ewv 1.131 PsetEdit.pythiaFirstRun(0) #First Run
303 slacapra 1.86 if (self.sourceSeed) :
304 ewv 1.131 PsetEdit.pythiaSeed(0)
305 slacapra 1.86 if (self.sourceSeedVtx) :
306 ewv 1.131 PsetEdit.vtxSeed(0)
307 slacapra 1.90 if (self.sourceSeedG4) :
308 ewv 1.131 PsetEdit.g4Seed(0)
309 slacapra 1.90 if (self.sourceSeedMix) :
310 ewv 1.131 PsetEdit.mixSeed(0)
311 slacapra 1.86 # add FrameworkJobReport to parameter-set
312 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
313     PsetEdit.psetWriter(self.configFilename())
314 slacapra 1.86 except:
315     msg='Error while manipuliating ParameterSet: exiting...'
316     raise CrabException(msg)
317 gutsche 1.3
318 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
319    
320 slacapra 1.86 import DataDiscovery
321     import DataLocation
322 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
323    
324     datasetPath=self.datasetPath
325    
326 slacapra 1.1 ## Contact the DBS
327 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
328 slacapra 1.1 try:
329 gutsche 1.66
330 slacapra 1.137 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
331 slacapra 1.1 self.pubdata.fetchDBSInfo()
332    
333 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
334 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
335     raise CrabException(msg)
336 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
337 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
338     raise CrabException(msg)
339 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
340 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
341 slacapra 1.1 raise CrabException(msg)
342    
343 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
344 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
345     self.eventsbyfile=self.pubdata.getEventsPerFile()
346 gutsche 1.3
347 slacapra 1.1 ## get max number of events
348 ewv 1.131 self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
349 slacapra 1.1
350     ## Contact the DLS and build a list of sites hosting the fileblocks
351     try:
352 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
353 gutsche 1.6 dataloc.fetchDLSInfo()
354 slacapra 1.41 except DataLocation.DataLocationError , ex:
355 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
356     raise CrabException(msg)
357 ewv 1.131
358 slacapra 1.1
359 gutsche 1.35 sites = dataloc.getSites()
360     allSites = []
361     listSites = sites.values()
362 slacapra 1.63 for listSite in listSites:
363     for oneSite in listSite:
364 gutsche 1.35 allSites.append(oneSite)
365     allSites = self.uniquelist(allSites)
366 gutsche 1.3
367 gutsche 1.92 # screen output
368     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
369    
370 gutsche 1.35 return sites
371 ewv 1.131
372 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
373 slacapra 1.9 """
374 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
375     and no more than one block.
376     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
377     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
378     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
379     self.maxEvents, self.filesbyblock
380     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
381     self.total_number_of_jobs - Total # of jobs
382     self.list_of_args - File(s) job will run on (a list of lists)
383     """
384    
385     # ---- Handle the possible job splitting configurations ---- #
386     if (self.selectTotalNumberEvents):
387     totalEventsRequested = self.total_number_of_events
388     if (self.selectEventsPerJob):
389     eventsPerJobRequested = self.eventsPerJob
390     if (self.selectNumberOfJobs):
391     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
392    
393     # If user requested all the events in the dataset
394     if (totalEventsRequested == -1):
395     eventsRemaining=self.maxEvents
396     # If user requested more events than are in the dataset
397     elif (totalEventsRequested > self.maxEvents):
398     eventsRemaining = self.maxEvents
399     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
400     # If user requested less events than are in the dataset
401     else:
402     eventsRemaining = totalEventsRequested
403 slacapra 1.22
404 slacapra 1.41 # If user requested more events per job than are in the dataset
405     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
406     eventsPerJobRequested = self.maxEvents
407    
408 gutsche 1.35 # For user info at end
409     totalEventCount = 0
410 gutsche 1.3
411 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
412     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
413 slacapra 1.22
414 gutsche 1.35 if (self.selectNumberOfJobs):
415     common.logger.message("May not create the exact number_of_jobs requested.")
416 slacapra 1.23
417 gutsche 1.38 if ( self.ncjobs == 'all' ) :
418     totalNumberOfJobs = 999999999
419     else :
420     totalNumberOfJobs = self.ncjobs
421 ewv 1.131
422 gutsche 1.38
423 gutsche 1.35 blocks = blockSites.keys()
424     blockCount = 0
425     # Backup variable in case self.maxEvents counted events in a non-included block
426     numBlocksInDataset = len(blocks)
427 gutsche 1.3
428 gutsche 1.35 jobCount = 0
429     list_of_lists = []
430 gutsche 1.3
431 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
432     jobsOfBlock = {}
433    
434 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
435     # ---- we've met the requested total # of events ---- #
436 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
437 gutsche 1.35 block = blocks[blockCount]
438 gutsche 1.44 blockCount += 1
439 gutsche 1.104 if block not in jobsOfBlock.keys() :
440     jobsOfBlock[block] = []
441 ewv 1.131
442 gutsche 1.68 if self.eventsbyblock.has_key(block) :
443     numEventsInBlock = self.eventsbyblock[block]
444     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
445 ewv 1.131
446 gutsche 1.68 files = self.filesbyblock[block]
447     numFilesInBlock = len(files)
448     if (numFilesInBlock <= 0):
449     continue
450     fileCount = 0
451    
452     # ---- New block => New job ---- #
453 ewv 1.131 parString = ""
454 gutsche 1.68 # counter for number of events in files currently worked on
455     filesEventCount = 0
456     # flag if next while loop should touch new file
457     newFile = 1
458     # job event counter
459     jobSkipEventCount = 0
460 ewv 1.131
461 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
462     # ---- total # of events or we've gone over all the files in this block ---- #
463     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
464     file = files[fileCount]
465     if newFile :
466     try:
467     numEventsInFile = self.eventsbyfile[file]
468     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
469     # increase filesEventCount
470     filesEventCount += numEventsInFile
471     # Add file to current job
472     parString += '\\\"' + file + '\\\"\,'
473     newFile = 0
474     except KeyError:
475     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
476 ewv 1.131
477 gutsche 1.38
478 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
479     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
480     # if last file in block
481     if ( fileCount == numFilesInBlock-1 ) :
482     # end job using last file, use remaining events in block
483     # close job and touch new file
484     fullString = parString[:-2]
485     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
486     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
487     self.jobDestination.append(blockSites[block])
488     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
489 gutsche 1.92 # fill jobs of block dictionary
490 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
491 gutsche 1.68 # reset counter
492     jobCount = jobCount + 1
493     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
494     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
495     jobSkipEventCount = 0
496     # reset file
497 ewv 1.131 parString = ""
498 gutsche 1.68 filesEventCount = 0
499     newFile = 1
500     fileCount += 1
501     else :
502     # go to next file
503     newFile = 1
504     fileCount += 1
505     # if events in file equal to eventsPerJobRequested
506     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
507 gutsche 1.38 # close job and touch new file
508     fullString = parString[:-2]
509 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
510     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
511 gutsche 1.38 self.jobDestination.append(blockSites[block])
512     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
513 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
514 gutsche 1.38 # reset counter
515     jobCount = jobCount + 1
516 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
517     eventsRemaining = eventsRemaining - eventsPerJobRequested
518 gutsche 1.38 jobSkipEventCount = 0
519     # reset file
520 ewv 1.131 parString = ""
521 gutsche 1.38 filesEventCount = 0
522     newFile = 1
523     fileCount += 1
524 ewv 1.131
525 gutsche 1.68 # if more events in file remain than eventsPerJobRequested
526 gutsche 1.38 else :
527 gutsche 1.68 # close job but don't touch new file
528     fullString = parString[:-2]
529     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
530     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
531     self.jobDestination.append(blockSites[block])
532     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
533 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
534 gutsche 1.68 # increase counter
535     jobCount = jobCount + 1
536     totalEventCount = totalEventCount + eventsPerJobRequested
537     eventsRemaining = eventsRemaining - eventsPerJobRequested
538     # calculate skip events for last file
539     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
540     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
541     # remove all but the last file
542     filesEventCount = self.eventsbyfile[file]
543 ewv 1.131 parString = ""
544 gutsche 1.68 parString += '\\\"' + file + '\\\"\,'
545     pass # END if
546     pass # END while (iterate over files in the block)
547 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
548 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
549 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
550 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
551 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
552 ewv 1.131
553 gutsche 1.92 # screen output
554     screenOutput = "List of jobs and available destination sites:\n\n"
555    
556 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
557     noSiteBlock = []
558     bloskNoSite = []
559    
560 gutsche 1.92 blockCounter = 0
561 gutsche 1.104 for block in blocks:
562     if block in jobsOfBlock.keys() :
563     blockCounter += 1
564 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
565 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
566 ewv 1.131 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
567 mcinquil 1.124 bloskNoSite.append( blockCounter )
568 ewv 1.131
569 mcinquil 1.124 common.logger.message(screenOutput)
570 fanzago 1.127 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
571 mcinquil 1.126 msg = 'WARNING: No sites are hosting any part of data for block:\n '
572     virgola = ""
573     if len(bloskNoSite) > 1:
574     virgola = ","
575     for block in bloskNoSite:
576     msg += ' ' + str(block) + virgola
577     msg += '\n Related jobs:\n '
578     virgola = ""
579     if len(noSiteBlock) > 1:
580     virgola = ","
581     for range_jobs in noSiteBlock:
582     msg += str(range_jobs) + virgola
583     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
584     common.logger.message(msg)
585 gutsche 1.92
586 slacapra 1.9 self.list_of_args = list_of_lists
587     return
588    
589 slacapra 1.21 def jobSplittingNoInput(self):
590 slacapra 1.9 """
591     Perform job splitting based on number of event per job
592     """
593     common.logger.debug(5,'Splitting per events')
594 fanzago 1.130
595 ewv 1.131 if (self.selectEventsPerJob):
596 fanzago 1.130 common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
597     if (self.selectNumberOfJobs):
598     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
599     if (self.selectTotalNumberEvents):
600     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
601 slacapra 1.9
602 slacapra 1.10 if (self.total_number_of_events < 0):
603     msg='Cannot split jobs per Events with "-1" as total number of events'
604     raise CrabException(msg)
605    
606 slacapra 1.22 if (self.selectEventsPerJob):
607 spiga 1.65 if (self.selectTotalNumberEvents):
608     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
609 ewv 1.131 elif(self.selectNumberOfJobs) :
610 spiga 1.65 self.total_number_of_jobs =self.theNumberOfJobs
611 ewv 1.131 self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
612 spiga 1.65
613 slacapra 1.22 elif (self.selectNumberOfJobs) :
614     self.total_number_of_jobs = self.theNumberOfJobs
615     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
616 ewv 1.131
617 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
618    
619     # is there any remainder?
620     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
621    
622     common.logger.debug(5,'Check '+str(check))
623    
624 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
625 slacapra 1.9 if check > 0:
626 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
627 slacapra 1.9
628 slacapra 1.10 # argument is seed number.$i
629 slacapra 1.9 self.list_of_args = []
630     for i in range(self.total_number_of_jobs):
631 gutsche 1.35 ## Since there is no input, any site is good
632 ewv 1.131 self.jobDestination.append([""]) #must be empty to write correctly the xml
633 slacapra 1.90 args=[]
634 spiga 1.57 if (self.firstRun):
635 slacapra 1.138 ## pythia first run
636 slacapra 1.90 args.append(str(self.firstRun)+str(i))
637 slacapra 1.23 if (self.sourceSeed):
638 slacapra 1.90 args.append(str(self.sourceSeed)+str(i))
639 slacapra 1.28 if (self.sourceSeedVtx):
640 slacapra 1.90 ## + vtx random seed
641     args.append(str(self.sourceSeedVtx)+str(i))
642     if (self.sourceSeedG4):
643     ## + G4 random seed
644     args.append(str(self.sourceSeedG4)+str(i))
645 ewv 1.131 if (self.sourceSeedMix):
646 slacapra 1.90 ## + Mix random seed
647     args.append(str(self.sourceSeedMix)+str(i))
648     pass
649     pass
650     self.list_of_args.append(args)
651     pass
652 ewv 1.131
653 slacapra 1.90 # print self.list_of_args
654 gutsche 1.3
655     return
656    
657 spiga 1.42
658     def jobSplittingForScript(self):#CarlosDaniele
659     """
660     Perform job splitting based on number of job
661     """
662     common.logger.debug(5,'Splitting per job')
663     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
664    
665     self.total_number_of_jobs = self.theNumberOfJobs
666    
667     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
668    
669     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
670    
671     # argument is seed number.$i
672     self.list_of_args = []
673     for i in range(self.total_number_of_jobs):
674     ## Since there is no input, any site is good
675     # self.jobDestination.append(["Any"])
676     self.jobDestination.append([""])
677     ## no random seed
678     self.list_of_args.append([str(i)])
679     return
680    
681 gutsche 1.3 def split(self, jobParams):
682 ewv 1.131
683 gutsche 1.3 common.jobDB.load()
684     #### Fabio
685     njobs = self.total_number_of_jobs
686 slacapra 1.9 arglist = self.list_of_args
687 gutsche 1.3 # create the empty structure
688     for i in range(njobs):
689     jobParams.append("")
690 ewv 1.131
691 gutsche 1.3 for job in range(njobs):
692 slacapra 1.17 jobParams[job] = arglist[job]
693     # print str(arglist[job])
694     # print jobParams[job]
695 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
696 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
697     common.jobDB.setDestination(job, self.jobDestination[job])
698 gutsche 1.3
699     common.jobDB.save()
700     return
701 ewv 1.131
702 gutsche 1.3 def getJobTypeArguments(self, nj, sched):
703 slacapra 1.17 result = ''
704     for i in common.jobDB.arguments(nj):
705     result=result+str(i)+" "
706     return result
707 ewv 1.131
708 gutsche 1.3 def numberOfJobs(self):
709     # Fabio
710     return self.total_number_of_jobs
711    
712 slacapra 1.1 def getTarBall(self, exe):
713     """
714     Return the TarBall with lib and exe
715     """
716 ewv 1.131
717 slacapra 1.1 # if it exist, just return it
718 corvo 1.56 #
719     # Marco. Let's start to use relative path for Boss XML files
720     #
721     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
722 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
723     return self.tgzNameWithPath
724    
725     # Prepare a tar gzipped file with user binaries.
726     self.buildTar_(exe)
727    
728     return string.strip(self.tgzNameWithPath)
729    
730     def buildTar_(self, executable):
731    
732     # First of all declare the user Scram area
733     swArea = self.scram.getSWArea_()
734     #print "swArea = ", swArea
735 slacapra 1.63 # swVersion = self.scram.getSWVersion()
736     # print "swVersion = ", swVersion
737 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
738     #print "swReleaseTop = ", swReleaseTop
739 ewv 1.131
740 slacapra 1.1 ## check if working area is release top
741     if swReleaseTop == '' or swArea == swReleaseTop:
742     return
743    
744 slacapra 1.61 import tarfile
745     try: # create tar ball
746     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
747     ## First find the executable
748 slacapra 1.86 if (self.executable != ''):
749 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
750     if ( not exeWithPath ):
751     raise CrabException('User executable '+executable+' not found')
752 ewv 1.131
753 slacapra 1.61 ## then check if it's private or not
754     if exeWithPath.find(swReleaseTop) == -1:
755     # the exe is private, so we must ship
756     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
757     path = swArea+'/'
758 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
759     if exeWithPath.find(path) >= 0 :
760     exe = string.replace(exeWithPath, path,'')
761 slacapra 1.129 tar.add(path+exe,exe)
762 corvo 1.85 else :
763     tar.add(exeWithPath,os.path.basename(executable))
764 slacapra 1.61 pass
765     else:
766     # the exe is from release, we'll find it on WN
767     pass
768 ewv 1.131
769 slacapra 1.61 ## Now get the libraries: only those in local working area
770     libDir = 'lib'
771     lib = swArea+'/' +libDir
772     common.logger.debug(5,"lib "+lib+" to be tarred")
773     if os.path.exists(lib):
774     tar.add(lib,libDir)
775 ewv 1.131
776 slacapra 1.61 ## Now check if module dir is present
777     moduleDir = 'module'
778     module = swArea + '/' + moduleDir
779     if os.path.isdir(module):
780     tar.add(module,moduleDir)
781    
782     ## Now check if any data dir(s) is present
783     swAreaLen=len(swArea)
784     for root, dirs, files in os.walk(swArea):
785     if "data" in dirs:
786     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
787     tar.add(root+"/data",root[swAreaLen:]+"/data")
788    
789     ## Add ProdAgent dir to tar
790     paDir = 'ProdAgentApi'
791     pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
792     if os.path.isdir(pa):
793     tar.add(pa,paDir)
794 fanzago 1.93
795     ### FEDE FOR DBS PUBLICATION
796     ## Add PRODCOMMON dir to tar
797     prodcommonDir = 'ProdCommon'
798     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
799     if os.path.isdir(prodcommonPath):
800     tar.add(prodcommonPath,prodcommonDir)
801 ewv 1.131 #############################
802    
803 slacapra 1.61 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
804     tar.close()
805     except :
806     raise CrabException('Could not create tar-ball')
807 gutsche 1.72
808     ## check for tarball size
809     tarballinfo = os.stat(self.tgzNameWithPath)
810     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
811     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
812    
813 slacapra 1.61 ## create tar-ball with ML stuff
814 ewv 1.131 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
815 slacapra 1.61 try:
816     tar = tarfile.open(self.MLtgzfile, "w:gz")
817     path=os.environ['CRABDIR'] + '/python/'
818     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
819     tar.add(path+file,file)
820     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
821     tar.close()
822     except :
823 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
824 ewv 1.131
825 slacapra 1.1 return
826 ewv 1.131
827 slacapra 1.97 def additionalInputFileTgz(self):
828     """
829     Put all additional files into a tar ball and return its name
830     """
831     import tarfile
832     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
833     tar = tarfile.open(tarName, "w:gz")
834     for file in self.additional_inbox_files:
835     tar.add(file,string.split(file,'/')[-1])
836     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
837     tar.close()
838     return tarName
839    
840 slacapra 1.1 def wsSetupEnvironment(self, nj):
841     """
842     Returns part of a job script which prepares
843     the execution environment for the job 'nj'.
844     """
845     # Prepare JobType-independent part
846 ewv 1.131 txt = ''
847 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
848 ewv 1.131 txt += 'if [ $middleware == LCG ]; then \n'
849 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
850     txt += 'elif [ $middleware == OSG ]; then\n'
851 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
852 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
853 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
854 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
855     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
856     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
857 gutsche 1.3 txt += ' exit 1\n'
858     txt += ' fi\n'
859 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
860 gutsche 1.3 txt += '\n'
861     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
862     txt += ' cd $WORKING_DIR\n'
863 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
864 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
865 fanzago 1.133 #txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
866     #txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
867 gutsche 1.3 txt += 'fi\n'
868 slacapra 1.1
869     # Prepare JobType-specific part
870     scram = self.scram.commandName()
871     txt += '\n\n'
872 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
873     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
874 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
875     txt += 'status=$?\n'
876     txt += 'if [ $status != 0 ] ; then\n'
877 fanzago 1.133 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
878     txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
879     txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
880     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
881 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
882     txt += ' cd $RUNTIME_AREA\n'
883 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
884     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
885 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
886     txt += ' if [ -d $WORKING_DIR ] ;then\n'
887 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
888     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
889     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
890     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
891 gutsche 1.3 txt += ' fi\n'
892     txt += ' fi \n'
893 fanzago 1.133 txt += ' exit 1 \n'
894 slacapra 1.1 txt += 'fi \n'
895     txt += 'cd '+self.version+'\n'
896 fanzago 1.99 ########## FEDE FOR DBS2 ######################
897     txt += 'SOFTWARE_DIR=`pwd`\n'
898 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
899 fanzago 1.99 ###############################################
900 slacapra 1.1 ### needed grep for bug in scramv1 ###
901 corvo 1.58 txt += scram+' runtime -sh\n'
902 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
903 corvo 1.58 txt += 'echo $PATH\n'
904 slacapra 1.1
905     # Handle the arguments:
906     txt += "\n"
907 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
908 slacapra 1.1 txt += "\n"
909 mkirn 1.32 txt += "if [ $nargs -lt 2 ]\n"
910 slacapra 1.1 txt += "then\n"
911 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
912 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
913 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
914 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
915 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
916     txt += ' cd $RUNTIME_AREA\n'
917 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
918     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
919 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
920     txt += ' if [ -d $WORKING_DIR ] ;then\n'
921 fanzago 1.96 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
922     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
923     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
924     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
925 gutsche 1.3 txt += ' fi\n'
926     txt += ' fi \n'
927 slacapra 1.1 txt += " exit 1\n"
928     txt += "fi\n"
929     txt += "\n"
930    
931     # Prepare job-specific part
932     job = common.job_list[nj]
933 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
934 ewv 1.131 if (self.datasetPath):
935 fanzago 1.93 txt += '\n'
936     txt += 'DatasetPath='+self.datasetPath+'\n'
937    
938     datasetpath_split = self.datasetPath.split("/")
939 ewv 1.131
940 fanzago 1.93 txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
941     txt += 'DataTier='+datasetpath_split[2]+'\n'
942 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
943 fanzago 1.93
944     else:
945     txt += 'DatasetPath=MCDataTier\n'
946     txt += 'PrimaryDataset=null\n'
947     txt += 'DataTier=null\n'
948     txt += 'ApplicationFamily=MCDataTier\n'
949 spiga 1.42 if self.pset != None: #CarlosDaniele
950     pset = os.path.basename(job.configFilename())
951     txt += '\n'
952 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
953 spiga 1.42 if (self.datasetPath): # standard job
954     txt += 'InputFiles=${args[1]}\n'
955     txt += 'MaxEvents=${args[2]}\n'
956     txt += 'SkipEvents=${args[3]}\n'
957     txt += 'echo "Inputfiles:<$InputFiles>"\n'
958 ewv 1.131 txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
959 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
960 ewv 1.131 txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
961 spiga 1.42 txt += 'echo "SkipEvents:<$SkipEvents>"\n'
962 ewv 1.131 txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
963 spiga 1.42 else: # pythia like job
964 slacapra 1.90 seedIndex=1
965     if (self.firstRun):
966     txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
967 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
968 ewv 1.131 txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
969 slacapra 1.90 seedIndex=seedIndex+1
970    
971 spiga 1.57 if (self.sourceSeed):
972 slacapra 1.90 txt += 'Seed=${args['+str(seedIndex)+']}\n'
973 ewv 1.131 txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
974 slacapra 1.90 seedIndex=seedIndex+1
975     ## the following seeds are not always present
976 spiga 1.42 if (self.sourceSeedVtx):
977 slacapra 1.90 txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
978 spiga 1.42 txt += 'echo "VtxSeed: <$VtxSeed>"\n'
979 ewv 1.131 txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
980 slacapra 1.90 seedIndex += 1
981     if (self.sourceSeedG4):
982     txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
983     txt += 'echo "G4Seed: <$G4Seed>"\n'
984 ewv 1.131 txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
985 slacapra 1.90 seedIndex += 1
986     if (self.sourceSeedMix):
987     txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
988     txt += 'echo "MixSeed: <$mixSeed>"\n'
989 ewv 1.131 txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
990 slacapra 1.90 seedIndex += 1
991     pass
992     pass
993     txt += 'mv -f '+pset+' pset.cfg\n'
994 slacapra 1.1
995     if len(self.additional_inbox_files) > 0:
996 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
997     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
998     txt += 'fi\n'
999 ewv 1.131 pass
1000 slacapra 1.1
1001 spiga 1.42 if self.pset != None: #CarlosDaniele
1002     txt += '\n'
1003     txt += 'echo "***** cat pset.cfg *********"\n'
1004     txt += 'cat pset.cfg\n'
1005     txt += 'echo "****** end pset.cfg ********"\n'
1006     txt += '\n'
1007 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1008 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1009     txt += 'echo "PSETHASH = $PSETHASH" \n'
1010 ewv 1.131 ##############
1011 fanzago 1.93 txt += '\n'
1012 gutsche 1.3 return txt
1013    
1014 slacapra 1.63 def wsBuildExe(self, nj=0):
1015 gutsche 1.3 """
1016     Put in the script the commands to build an executable
1017     or a library.
1018     """
1019    
1020     txt = ""
1021    
1022     if os.path.isfile(self.tgzNameWithPath):
1023 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
1024 gutsche 1.3 txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1025     txt += 'untar_status=$? \n'
1026     txt += 'if [ $untar_status -ne 0 ]; then \n'
1027     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1028     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
1029 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1030 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
1031     txt += ' cd $RUNTIME_AREA\n'
1032 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1033     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1034 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
1035     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1036 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1037     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
1038     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1039     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1040 gutsche 1.3 txt += ' fi\n'
1041     txt += ' fi \n'
1042     txt += ' \n'
1043 gutsche 1.7 txt += ' exit 1 \n'
1044 gutsche 1.3 txt += 'else \n'
1045     txt += ' echo "Successful untar" \n'
1046     txt += 'fi \n'
1047 gutsche 1.50 txt += '\n'
1048 fanzago 1.133 txt += 'echo ">>> Include ProdAgentApi and PRODCOMMON in PYTHONPATH:"\n'
1049 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1050 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
1051 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1052 gutsche 1.50 txt += 'else\n'
1053 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1054 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1055 ewv 1.131 ###################
1056 gutsche 1.50 txt += 'fi\n'
1057     txt += '\n'
1058    
1059 gutsche 1.3 pass
1060 ewv 1.131
1061 slacapra 1.1 return txt
1062    
1063     def modifySteeringCards(self, nj):
1064     """
1065 ewv 1.131 modify the card provided by the user,
1066 slacapra 1.1 writing a new card into share dir
1067     """
1068 ewv 1.131
1069 slacapra 1.1 def executableName(self):
1070 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1071 spiga 1.42 return "sh "
1072     else:
1073     return self.executable
1074 slacapra 1.1
1075     def executableArgs(self):
1076 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1077 spiga 1.42 return self.scriptExe + " $NJob"
1078 fanzago 1.115 else:
1079     # if >= CMSSW_1_5_X, add -e
1080     version_array = self.scram.getSWVersion().split('_')
1081     major = 0
1082     minor = 0
1083     try:
1084     major = int(version_array[1])
1085     minor = int(version_array[2])
1086     except:
1087 ewv 1.131 msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1088 fanzago 1.115 raise CrabException(msg)
1089     if major >= 1 and minor >= 5 :
1090     return " -e -p pset.cfg"
1091     else:
1092     return " -p pset.cfg"
1093 slacapra 1.1
1094     def inputSandbox(self, nj):
1095     """
1096     Returns a list of filenames to be put in JDL input sandbox.
1097     """
1098     inp_box = []
1099 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1100     # seen = {}
1101 slacapra 1.1 ## code
1102     if os.path.isfile(self.tgzNameWithPath):
1103     inp_box.append(self.tgzNameWithPath)
1104 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1105     inp_box.append(self.MLtgzfile)
1106 slacapra 1.1 ## config
1107 slacapra 1.70 if not self.pset is None:
1108 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1109 slacapra 1.1 ## additional input files
1110 slacapra 1.97 tgz = self.additionalInputFileTgz()
1111     inp_box.append(tgz)
1112 slacapra 1.1 return inp_box
1113    
1114     def outputSandbox(self, nj):
1115     """
1116     Returns a list of filenames to be put in JDL output sandbox.
1117     """
1118     out_box = []
1119    
1120     ## User Declared output files
1121 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1122 ewv 1.131 n_out = nj + 1
1123 slacapra 1.1 out_box.append(self.numberFile_(out,str(n_out)))
1124     return out_box
1125    
1126     def prepareSteeringCards(self):
1127     """
1128     Make initial modifications of the user's steering card file.
1129     """
1130     return
1131    
1132     def wsRenameOutput(self, nj):
1133     """
1134     Returns part of a job script which renames the produced files.
1135     """
1136    
1137     txt = '\n'
1138 fanzago 1.133 txt += 'echo" >>> directory content:"\n'
1139 gutsche 1.7 txt += 'ls \n'
1140 fanzago 1.133 txt = '\n'
1141 slacapra 1.54
1142 fanzago 1.128 txt += 'output_exit_status=0\n'
1143 ewv 1.131
1144 fanzago 1.128 for fileWithSuffix in (self.output_file_sandbox):
1145     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1146     txt += '\n'
1147     txt += '# check output file\n'
1148     txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1149     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1150     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1151     txt += 'else\n'
1152     txt += ' exit_status=60302\n'
1153     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1154     if common.scheduler.boss_scheduler_name == 'condor_g':
1155     txt += ' if [ $middleware == OSG ]; then \n'
1156     txt += ' echo "prepare dummy output file"\n'
1157     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1158     txt += ' fi \n'
1159     txt += 'fi\n'
1160 ewv 1.131
1161 fanzago 1.128 for fileWithSuffix in (self.output_file):
1162 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1163     txt += '\n'
1164 gutsche 1.7 txt += '# check output file\n'
1165 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1166 fanzago 1.117 txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1167     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1168 slacapra 1.106 txt += 'else\n'
1169 fanzago 1.117 txt += ' exit_status=60302\n'
1170     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1171 fanzago 1.128 txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1172     txt += ' output_exit_status=$exit_status\n'
1173 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
1174     txt += ' if [ $middleware == OSG ]; then \n'
1175     txt += ' echo "prepare dummy output file"\n'
1176     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1177     txt += ' fi \n'
1178 slacapra 1.1 txt += 'fi\n'
1179 slacapra 1.105 file_list = []
1180     for fileWithSuffix in (self.output_file):
1181     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1182 ewv 1.131
1183 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1184 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1185 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1186 slacapra 1.1 return txt
1187    
1188     def numberFile_(self, file, txt):
1189     """
1190     append _'txt' before last extension of a file
1191     """
1192     p = string.split(file,".")
1193     # take away last extension
1194     name = p[0]
1195     for x in p[1:-1]:
1196 slacapra 1.90 name=name+"."+x
1197 slacapra 1.1 # add "_txt"
1198     if len(p)>1:
1199 slacapra 1.90 ext = p[len(p)-1]
1200     result = name + '_' + txt + "." + ext
1201 slacapra 1.1 else:
1202 slacapra 1.90 result = name + '_' + txt
1203 ewv 1.131
1204 slacapra 1.1 return result
1205    
1206 slacapra 1.63 def getRequirements(self, nj=[]):
1207 slacapra 1.1 """
1208 ewv 1.131 return job requirements to add to jdl files
1209 slacapra 1.1 """
1210     req = ''
1211 slacapra 1.47 if self.version:
1212 slacapra 1.10 req='Member("VO-cms-' + \
1213 slacapra 1.47 self.version + \
1214 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1215 farinafa 1.111 ## SL add requirement for OS version only if SL4
1216     #reSL4 = re.compile( r'slc4' )
1217 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1218 gutsche 1.107 req+=' && Member("VO-cms-' + \
1219 slacapra 1.105 self.executable_arch + \
1220     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1221 gutsche 1.35
1222     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1223    
1224 slacapra 1.1 return req
1225 gutsche 1.3
1226     def configFilename(self):
1227     """ return the config filename """
1228     return self.name()+'.cfg'
1229    
1230     def wsSetupCMSOSGEnvironment_(self):
1231     """
1232     Returns part of a job script which is prepares
1233     the execution environment and which is common for all CMS jobs.
1234     """
1235 fanzago 1.133 txt = ' echo ">>> setup CMS OSG environment:"\n'
1236     txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1237     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1238 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1239 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1240 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1241 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1242     txt += ' else\n'
1243 ewv 1.135 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1244 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1245     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1246     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1247 gutsche 1.3 txt += '\n'
1248 fanzago 1.133 txt += ' cd $RUNTIME_AREA\n'
1249     txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1250     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1251     txt += ' /bin/rm -rf $WORKING_DIR\n'
1252     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1253 ewv 1.135 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1254 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1255     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1256     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1257     txt += ' fi\n'
1258 gutsche 1.3 txt += '\n'
1259 fanzago 1.133 txt += ' exit 1\n'
1260     txt += ' fi\n'
1261 gutsche 1.3 txt += '\n'
1262 fanzago 1.133 txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1263 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1264 gutsche 1.3
1265     return txt
1266 ewv 1.131
1267 gutsche 1.3 ### OLI_DANIELE
1268     def wsSetupCMSLCGEnvironment_(self):
1269     """
1270     Returns part of a job script which is prepares
1271     the execution environment and which is common for all CMS jobs.
1272     """
1273 fanzago 1.133 txt = ' echo ">>> setup CMS LCG environment:"\n'
1274     txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1275     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1276     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1277     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1278     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1279     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1280     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1281     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1282     txt += ' exit 1\n'
1283     txt += ' else\n'
1284     txt += ' echo "Sourcing environment... "\n'
1285     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1286     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1287     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1288     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1289     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1290     txt += ' exit 1\n'
1291     txt += ' fi\n'
1292     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1293     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1294     txt += ' result=$?\n'
1295     txt += ' if [ $result -ne 0 ]; then\n'
1296     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1297     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1298     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1299     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1300     txt += ' exit 1\n'
1301     txt += ' fi\n'
1302     txt += ' fi\n'
1303     txt += ' \n'
1304     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1305 gutsche 1.3 return txt
1306 gutsche 1.5
1307 ewv 1.131 ### FEDE FOR DBS OUTPUT PUBLICATION
1308 fanzago 1.93 def modifyReport(self, nj):
1309     """
1310 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1311 fanzago 1.93 """
1312 fanzago 1.94
1313 ewv 1.131 txt = ''
1314 fanzago 1.94 try:
1315 ewv 1.131 publish_data = int(self.cfg_params['USER.publish_data'])
1316 fanzago 1.94 except KeyError:
1317     publish_data = 0
1318 ewv 1.131 if (publish_data == 1):
1319 fanzago 1.133 txt += 'echo ">>> Modify Job Report:" \n'
1320 fanzago 1.122 ################ FEDE FOR DBS2 #############################################
1321     txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1322     #############################################################################
1323 fanzago 1.94
1324 fanzago 1.122 txt += 'if [ -z "$SE" ]; then\n'
1325     txt += ' SE="" \n'
1326 ewv 1.131 txt += 'fi \n'
1327 fanzago 1.122 txt += 'if [ -z "$SE_PATH" ]; then\n'
1328     txt += ' SE_PATH="" \n'
1329 ewv 1.131 txt += 'fi \n'
1330     txt += 'echo "SE = $SE"\n'
1331 fanzago 1.122 txt += 'echo "SE_PATH = $SE_PATH"\n'
1332 fanzago 1.94
1333     processedDataset = self.cfg_params['USER.publish_data_name']
1334     txt += 'ProcessedDataset='+processedDataset+'\n'
1335     #### LFN=/store/user/<user>/processedDataset_PSETHASH
1336     txt += 'if [ "$SE_PATH" == "" ]; then\n'
1337 fanzago 1.101 #### FEDE: added slash in LFN ##############
1338     txt += ' FOR_LFN=/copy_problems/ \n'
1339 ewv 1.131 txt += 'else \n'
1340 fanzago 1.94 txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1341 fanzago 1.101 ##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1342 fanzago 1.94 txt += ' FOR_LFN=/store$tmp \n'
1343 ewv 1.131 txt += 'fi \n'
1344 fanzago 1.94 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1345     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1346 spiga 1.103 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1347 ewv 1.131 #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1348     txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1349 spiga 1.103 txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1350     #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1351 ewv 1.131
1352 spiga 1.103 txt += 'modifyReport_result=$?\n'
1353     txt += 'echo modifyReport_result = $modifyReport_result\n'
1354     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1355     txt += ' exit_status=1\n'
1356     txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1357     txt += 'else\n'
1358     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1359     txt += 'fi\n'
1360 fanzago 1.94 else:
1361 fanzago 1.122 txt += 'echo "no data publication required"\n'
1362 fanzago 1.93 return txt
1363 fanzago 1.99
1364     def cleanEnv(self):
1365 ewv 1.131 txt = ''
1366     txt += 'if [ $middleware == OSG ]; then\n'
1367 fanzago 1.99 txt += ' cd $RUNTIME_AREA\n'
1368 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1369     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1370 fanzago 1.99 txt += ' /bin/rm -rf $WORKING_DIR\n'
1371     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1372 fanzago 1.133 txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1373     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1374     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1375     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1376 fanzago 1.99 txt += ' fi\n'
1377     txt += 'fi\n'
1378     txt += '\n'
1379     return txt
1380 fanzago 1.93
1381 gutsche 1.5 def setParam_(self, param, value):
1382     self._params[param] = value
1383    
1384     def getParams(self):
1385     return self._params
1386 gutsche 1.8
1387     def setTaskid_(self):
1388     self._taskId = self.cfg_params['taskId']
1389 ewv 1.131
1390 gutsche 1.8 def getTaskid(self):
1391     return self._taskId
1392 gutsche 1.35
1393     def uniquelist(self, old):
1394     """
1395     remove duplicates from a list
1396     """
1397     nd={}
1398     for e in old:
1399     nd[e]=0
1400     return nd.keys()
1401 mcinquil 1.121
1402    
1403     def checkOut(self, limit):
1404     """
1405     check the dimension of the output files
1406     """
1407 fanzago 1.133 txt += 'echo ">>> Starting output sandbox limit check :"\n'
1408 mcinquil 1.121 allOutFiles = ""
1409     listOutFiles = []
1410     for fileOut in (self.output_file+self.output_file_sandbox):
1411     if fileOut.find('crab_fjr') == -1:
1412     allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1413     listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1414     txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1415     txt += 'ls -gGhrta;\n'
1416     txt += 'sum=0;\n'
1417     txt += 'for file in '+str(allOutFiles)+' ; do\n'
1418     txt += ' if [ -e $file ]; then\n'
1419     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1420     txt += ' sum=`expr $sum + $tt`\n'
1421     txt += ' else\n'
1422     txt += ' echo "WARNING: output file $file not found!"\n'
1423     txt += ' fi\n'
1424     txt += 'done\n'
1425     txt += 'echo "Total Output dimension: $sum";\n'
1426     txt += 'limit='+str(limit)+';\n'
1427     txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1428     txt += 'if [ $limit -lt $sum ]; then\n'
1429     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1430     txt += ' echo " checking the output file sizes..."\n'
1431     """
1432     txt += ' dim=0;\n'
1433     txt += ' exclude=0;\n'
1434     txt += ' for files in '+str(allOutFiles)+' ; do\n'
1435     txt += ' sumTemp=0;\n'
1436     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1437     txt += ' if [ $file != $file2 ]; then\n'
1438     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1439     txt += ' sumTemp=`expr $sumTemp + $tt`;\n'
1440     txt += ' fi\n'
1441     txt += ' done\n'
1442     txt += ' if [ $sumTemp -lt $limit ]; then\n'
1443     txt += ' if [ $dim -lt $sumTemp ]; then\n'
1444     txt += ' dim=$sumTemp;\n'
1445     txt += ' exclude=$file;\n'
1446     txt += ' fi\n'
1447     txt += ' fi\n'
1448     txt += ' done\n'
1449     txt += ' echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1450     """
1451     txt += ' tot=0;\n'
1452     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1453     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1454     txt += ' tot=`expr $tot + $tt`;\n'
1455     txt += ' if [ $limit -lt $tot ]; then\n'
1456     txt += ' tot=`expr $tot - $tt`;\n'
1457     txt += ' fileLast=$file;\n'
1458     txt += ' break;\n'
1459     txt += ' fi\n'
1460     txt += ' done\n'
1461     txt += ' echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1462 ewv 1.131 txt += ' flag=0;\n'
1463 mcinquil 1.121 txt += ' for filess in '+str(allOutFiles)+' ; do\n'
1464     txt += ' if [ $fileLast = $filess ]; then\n'
1465     txt += ' flag=1;\n'
1466     txt += ' fi\n'
1467     txt += ' if [ $flag -eq 1 ]; then\n'
1468     txt += ' rm -f $filess;\n'
1469     txt += ' fi\n'
1470     txt += ' done\n'
1471     txt += ' ls -agGhrt;\n'
1472     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1473     txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1474     txt += ' exit_status=70000;\n'
1475     txt += 'else'
1476     txt += ' echo "Total Output dimension $sum is fine.";\n'
1477     txt += 'fi\n'
1478 fanzago 1.133 txt += 'echo "Ending output sandbox limit check"\n'
1479 mcinquil 1.121 return txt