ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.152
Committed: Wed Jan 9 16:05:10 2008 UTC (17 years, 3 months ago) by fanzago
Content type: text/x-python
Branch: MAIN
Changes since 1.151: +17 -16 lines
Log Message:
removed ProdAgent API dependencies for DBS publication

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8    
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 mcinquil 1.144 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 mcinquil 1.140 self.argsList = []
17 mcinquil 1.144
18 gutsche 1.3 self._params = {}
19     self.cfg_params = cfg_params
20 fanzago 1.115 # init BlackWhiteListParser
21     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
22    
23 gutsche 1.72 try:
24     self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
25     except KeyError:
26 slacapra 1.86 self.MaxTarBallSize = 9.5
27 gutsche 1.72
28 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
29 gutsche 1.38 self.ncjobs = ncjobs
30    
31 slacapra 1.1 log = common.logger
32 ewv 1.131
33 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
34     self.additional_inbox_files = []
35     self.scriptExe = ''
36     self.executable = ''
37 slacapra 1.71 self.executable_arch = self.scram.getArch()
38 slacapra 1.1 self.tgz_name = 'default.tgz'
39 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
40 corvo 1.56 self.scriptName = 'CMSSW.sh'
41 ewv 1.131 self.pset = '' #scrip use case Da
42 spiga 1.42 self.datasetPath = '' #scrip use case Da
43 gutsche 1.3
44 gutsche 1.50 # set FJR file name
45     self.fjrFileName = 'crab_fjr.xml'
46    
47 slacapra 1.1 self.version = self.scram.getSWVersion()
48 ewv 1.131
49 spiga 1.114 #
50     # Try to block creation in case of arch/version mismatch
51     #
52    
53     a = string.split(self.version, "_")
54    
55     if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
56 fanzago 1.134 msg = "Warning: You are using %s version of CMSSW with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
57     common.logger.message(msg)
58 spiga 1.114 if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
59     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
60     raise CrabException(msg)
61 ewv 1.131
62 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
63 gutsche 1.5 self.setParam_('application', self.version)
64 slacapra 1.47
65 slacapra 1.1 ### collect Data cards
66 gutsche 1.66
67 slacapra 1.1 try:
68 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
69     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
70     if string.lower(tmp)=='none':
71     self.datasetPath = None
72 slacapra 1.21 self.selectNoInput = 1
73 slacapra 1.9 else:
74     self.datasetPath = tmp
75 slacapra 1.21 self.selectNoInput = 0
76 slacapra 1.1 except KeyError:
77 ewv 1.131 msg = "Error: datasetpath not defined "
78 slacapra 1.1 raise CrabException(msg)
79 gutsche 1.5
80     # ML monitoring
81     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
82 slacapra 1.9 if not self.datasetPath:
83     self.setParam_('dataset', 'None')
84     self.setParam_('owner', 'None')
85     else:
86 gutsche 1.92 try:
87     datasetpath_split = self.datasetPath.split("/")
88     # standard style
89 mcinquil 1.120 self.setParam_('datasetFull', self.datasetPath)
90 slacapra 1.137 self.setParam_('dataset', datasetpath_split[1])
91     self.setParam_('owner', datasetpath_split[2])
92 gutsche 1.92 except:
93     self.setParam_('dataset', self.datasetPath)
94     self.setParam_('owner', self.datasetPath)
95 ewv 1.131
96 slacapra 1.151 self.setParam_('taskId', common.taskDB.dict('taskId'))
97 gutsche 1.5
98 slacapra 1.1 self.dataTiers = []
99    
100     ## now the application
101     try:
102     self.executable = cfg_params['CMSSW.executable']
103 gutsche 1.5 self.setParam_('exe', self.executable)
104 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
105     msg = "Default executable cmsRun overridden. Switch to " + self.executable
106     log.debug(3,msg)
107     except KeyError:
108     self.executable = 'cmsRun'
109 gutsche 1.5 self.setParam_('exe', self.executable)
110 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
111     log.debug(3,msg)
112     pass
113    
114     try:
115     self.pset = cfg_params['CMSSW.pset']
116     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
117 ewv 1.131 if self.pset.lower() != 'none' :
118 spiga 1.42 if (not os.path.exists(self.pset)):
119     raise CrabException("User defined PSet file "+self.pset+" does not exist")
120     else:
121     self.pset = None
122 slacapra 1.1 except KeyError:
123     raise CrabException("PSet file missing. Cannot run cmsRun ")
124    
125     # output files
126 slacapra 1.53 ## stuff which must be returned always via sandbox
127     self.output_file_sandbox = []
128    
129     # add fjr report by default via sandbox
130     self.output_file_sandbox.append(self.fjrFileName)
131    
132     # other output files to be returned via sandbox or copied to SE
133 slacapra 1.1 try:
134     self.output_file = []
135     tmp = cfg_params['CMSSW.output_file']
136     if tmp != '':
137     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
138     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
139     for tmp in tmpOutFiles:
140     tmp=string.strip(tmp)
141     self.output_file.append(tmp)
142     pass
143     else:
144 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
145 slacapra 1.1 pass
146     pass
147     except KeyError:
148 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
149 slacapra 1.1 pass
150    
151     # script_exe file as additional file in inputSandbox
152     try:
153 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
154     if self.scriptExe != '':
155     if not os.path.isfile(self.scriptExe):
156 slacapra 1.64 msg ="ERROR. file "+self.scriptExe+" not found"
157 slacapra 1.10 raise CrabException(msg)
158 spiga 1.42 self.additional_inbox_files.append(string.strip(self.scriptExe))
159 slacapra 1.1 except KeyError:
160 spiga 1.42 self.scriptExe = ''
161 slacapra 1.70
162 spiga 1.42 #CarlosDaniele
163     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
164 slacapra 1.70 msg ="Error. script_exe not defined"
165 spiga 1.42 raise CrabException(msg)
166    
167 slacapra 1.1 ## additional input files
168     try:
169 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
170 slacapra 1.70 for tmp in tmpAddFiles:
171     tmp = string.strip(tmp)
172     dirname = ''
173     if not tmp[0]=="/": dirname = "."
174 corvo 1.85 files = []
175     if string.find(tmp,"*")>-1:
176     files = glob.glob(os.path.join(dirname, tmp))
177     if len(files)==0:
178     raise CrabException("No additional input file found with this pattern: "+tmp)
179     else:
180     files.append(tmp)
181 slacapra 1.70 for file in files:
182     if not os.path.exists(file):
183     raise CrabException("Additional input file not found: "+file)
184 slacapra 1.45 pass
185 slacapra 1.105 # fname = string.split(file, '/')[-1]
186     # storedFile = common.work_space.pathForTgz()+'share/'+fname
187     # shutil.copyfile(file, storedFile)
188     self.additional_inbox_files.append(string.strip(file))
189 slacapra 1.1 pass
190     pass
191 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
192 slacapra 1.1 except KeyError:
193     pass
194    
195 slacapra 1.9 # files per job
196 slacapra 1.1 try:
197 gutsche 1.35 if (cfg_params['CMSSW.files_per_jobs']):
198     raise CrabException("files_per_jobs no longer supported. Quitting.")
199 gutsche 1.3 except KeyError:
200 gutsche 1.35 pass
201 gutsche 1.3
202 slacapra 1.9 ## Events per job
203 gutsche 1.3 try:
204 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
205 slacapra 1.9 self.selectEventsPerJob = 1
206 gutsche 1.3 except KeyError:
207 slacapra 1.9 self.eventsPerJob = -1
208     self.selectEventsPerJob = 0
209 ewv 1.131
210 slacapra 1.22 ## number of jobs
211     try:
212     self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
213     self.selectNumberOfJobs = 1
214     except KeyError:
215     self.theNumberOfJobs = 0
216     self.selectNumberOfJobs = 0
217 slacapra 1.10
218 gutsche 1.35 try:
219     self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
220     self.selectTotalNumberEvents = 1
221     except KeyError:
222     self.total_number_of_events = 0
223     self.selectTotalNumberEvents = 0
224    
225 ewv 1.131 if self.pset != None: #CarlosDaniele
226 spiga 1.42 if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
227     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
228     raise CrabException(msg)
229     else:
230     if (self.selectNumberOfJobs == 0):
231     msg = 'Must specify number_of_jobs.'
232     raise CrabException(msg)
233 gutsche 1.35
234 slacapra 1.22 ## source seed for pythia
235     try:
236     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
237     except KeyError:
238 slacapra 1.23 self.sourceSeed = None
239     common.logger.debug(5,"No seed given")
240 slacapra 1.22
241 slacapra 1.28 try:
242     self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
243     except KeyError:
244     self.sourceSeedVtx = None
245     common.logger.debug(5,"No vertex seed given")
246 slacapra 1.90
247     try:
248     self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
249     except KeyError:
250     self.sourceSeedG4 = None
251     common.logger.debug(5,"No g4 sim hits seed given")
252    
253     try:
254     self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
255     except KeyError:
256     self.sourceSeedMix = None
257     common.logger.debug(5,"No mix seed given")
258    
259 spiga 1.57 try:
260     self.firstRun = int(cfg_params['CMSSW.first_run'])
261     except KeyError:
262     self.firstRun = None
263     common.logger.debug(5,"No first run given")
264 spiga 1.42 if self.pset != None: #CarlosDaniele
265 ewv 1.131 import PsetManipulator as pp
266 slacapra 1.97 PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
267 gutsche 1.3
268 ewv 1.147 # Copy/return
269    
270     try:
271     self.copy_data = int(cfg_params['USER.copy_data'])
272     except KeyError:
273     self.copy_data = 0
274     try:
275     self.return_data = int(cfg_params['USER.return_data'])
276     except KeyError:
277     self.return_data = 0
278    
279 slacapra 1.1 #DBSDLS-start
280 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
281 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
282     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
283 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
284 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
285 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
286 gutsche 1.35 blockSites = {}
287 slacapra 1.9 if self.datasetPath:
288 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
289 ewv 1.131 #DBSDLS-end
290 slacapra 1.1
291     self.tgzNameWithPath = self.getTarBall(self.executable)
292 ewv 1.131
293 slacapra 1.9 ## Select Splitting
294 ewv 1.131 if self.selectNoInput:
295 spiga 1.42 if self.pset == None: #CarlosDaniele
296     self.jobSplittingForScript()
297     else:
298     self.jobSplittingNoInput()
299 gutsche 1.92 else:
300 corvo 1.56 self.jobSplittingByBlocks(blockSites)
301 gutsche 1.5
302 slacapra 1.22 # modify Pset
303 spiga 1.42 if self.pset != None: #CarlosDaniele
304 slacapra 1.86 try:
305     if (self.datasetPath): # standard job
306     # allow to processa a fraction of events in a file
307 ewv 1.131 PsetEdit.inputModule("INPUTFILE")
308     PsetEdit.maxEvent(0)
309     PsetEdit.skipEvent(0)
310 slacapra 1.86 else: # pythia like job
311 slacapra 1.90 PsetEdit.maxEvent(self.eventsPerJob)
312 slacapra 1.86 if (self.firstRun):
313 ewv 1.131 PsetEdit.pythiaFirstRun(0) #First Run
314 slacapra 1.86 if (self.sourceSeed) :
315 ewv 1.131 PsetEdit.pythiaSeed(0)
316 slacapra 1.86 if (self.sourceSeedVtx) :
317 ewv 1.131 PsetEdit.vtxSeed(0)
318 slacapra 1.90 if (self.sourceSeedG4) :
319 ewv 1.131 PsetEdit.g4Seed(0)
320 slacapra 1.90 if (self.sourceSeedMix) :
321 ewv 1.131 PsetEdit.mixSeed(0)
322 slacapra 1.86 # add FrameworkJobReport to parameter-set
323 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
324     PsetEdit.psetWriter(self.configFilename())
325 slacapra 1.86 except:
326     msg='Error while manipuliating ParameterSet: exiting...'
327     raise CrabException(msg)
328 gutsche 1.3
329 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
330    
331 slacapra 1.86 import DataDiscovery
332     import DataLocation
333 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
334    
335     datasetPath=self.datasetPath
336    
337 slacapra 1.1 ## Contact the DBS
338 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
339 slacapra 1.1 try:
340 gutsche 1.66
341 slacapra 1.137 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
342 slacapra 1.1 self.pubdata.fetchDBSInfo()
343    
344 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
345 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
346     raise CrabException(msg)
347 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
348 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
349     raise CrabException(msg)
350 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
351 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
352 slacapra 1.1 raise CrabException(msg)
353    
354 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
355 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
356     self.eventsbyfile=self.pubdata.getEventsPerFile()
357 gutsche 1.3
358 slacapra 1.1 ## get max number of events
359 ewv 1.131 self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
360 slacapra 1.1
361     ## Contact the DLS and build a list of sites hosting the fileblocks
362     try:
363 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
364 gutsche 1.6 dataloc.fetchDLSInfo()
365 slacapra 1.41 except DataLocation.DataLocationError , ex:
366 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
367     raise CrabException(msg)
368 ewv 1.131
369 slacapra 1.1
370 gutsche 1.35 sites = dataloc.getSites()
371     allSites = []
372     listSites = sites.values()
373 slacapra 1.63 for listSite in listSites:
374     for oneSite in listSite:
375 gutsche 1.35 allSites.append(oneSite)
376     allSites = self.uniquelist(allSites)
377 gutsche 1.3
378 gutsche 1.92 # screen output
379     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
380    
381 gutsche 1.35 return sites
382 ewv 1.131
383 mcinquil 1.140 def setArgsList(self, argsList):
384     self.argsList = argsList
385    
386 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
387 slacapra 1.9 """
388 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
389     and no more than one block.
390     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
391     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
392     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
393     self.maxEvents, self.filesbyblock
394     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
395     self.total_number_of_jobs - Total # of jobs
396     self.list_of_args - File(s) job will run on (a list of lists)
397     """
398    
399     # ---- Handle the possible job splitting configurations ---- #
400     if (self.selectTotalNumberEvents):
401     totalEventsRequested = self.total_number_of_events
402     if (self.selectEventsPerJob):
403     eventsPerJobRequested = self.eventsPerJob
404     if (self.selectNumberOfJobs):
405     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
406    
407     # If user requested all the events in the dataset
408     if (totalEventsRequested == -1):
409     eventsRemaining=self.maxEvents
410     # If user requested more events than are in the dataset
411     elif (totalEventsRequested > self.maxEvents):
412     eventsRemaining = self.maxEvents
413     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
414     # If user requested less events than are in the dataset
415     else:
416     eventsRemaining = totalEventsRequested
417 slacapra 1.22
418 slacapra 1.41 # If user requested more events per job than are in the dataset
419     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
420     eventsPerJobRequested = self.maxEvents
421    
422 gutsche 1.35 # For user info at end
423     totalEventCount = 0
424 gutsche 1.3
425 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
426     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
427 slacapra 1.22
428 gutsche 1.35 if (self.selectNumberOfJobs):
429     common.logger.message("May not create the exact number_of_jobs requested.")
430 slacapra 1.23
431 gutsche 1.38 if ( self.ncjobs == 'all' ) :
432     totalNumberOfJobs = 999999999
433     else :
434     totalNumberOfJobs = self.ncjobs
435 ewv 1.131
436 gutsche 1.38
437 gutsche 1.35 blocks = blockSites.keys()
438     blockCount = 0
439     # Backup variable in case self.maxEvents counted events in a non-included block
440     numBlocksInDataset = len(blocks)
441 gutsche 1.3
442 gutsche 1.35 jobCount = 0
443     list_of_lists = []
444 gutsche 1.3
445 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
446     jobsOfBlock = {}
447    
448 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
449     # ---- we've met the requested total # of events ---- #
450 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
451 gutsche 1.35 block = blocks[blockCount]
452 gutsche 1.44 blockCount += 1
453 gutsche 1.104 if block not in jobsOfBlock.keys() :
454     jobsOfBlock[block] = []
455 ewv 1.131
456 gutsche 1.68 if self.eventsbyblock.has_key(block) :
457     numEventsInBlock = self.eventsbyblock[block]
458     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
459 ewv 1.131
460 gutsche 1.68 files = self.filesbyblock[block]
461     numFilesInBlock = len(files)
462     if (numFilesInBlock <= 0):
463     continue
464     fileCount = 0
465    
466     # ---- New block => New job ---- #
467 ewv 1.131 parString = ""
468 gutsche 1.68 # counter for number of events in files currently worked on
469     filesEventCount = 0
470     # flag if next while loop should touch new file
471     newFile = 1
472     # job event counter
473     jobSkipEventCount = 0
474 ewv 1.131
475 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
476     # ---- total # of events or we've gone over all the files in this block ---- #
477     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
478     file = files[fileCount]
479     if newFile :
480     try:
481     numEventsInFile = self.eventsbyfile[file]
482     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
483     # increase filesEventCount
484     filesEventCount += numEventsInFile
485     # Add file to current job
486     parString += '\\\"' + file + '\\\"\,'
487     newFile = 0
488     except KeyError:
489     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
490 ewv 1.131
491 gutsche 1.38
492 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
493     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
494     # if last file in block
495     if ( fileCount == numFilesInBlock-1 ) :
496     # end job using last file, use remaining events in block
497     # close job and touch new file
498     fullString = parString[:-2]
499     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
500     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
501     self.jobDestination.append(blockSites[block])
502     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
503 gutsche 1.92 # fill jobs of block dictionary
504 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
505 gutsche 1.68 # reset counter
506     jobCount = jobCount + 1
507     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
508     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
509     jobSkipEventCount = 0
510     # reset file
511 ewv 1.131 parString = ""
512 gutsche 1.68 filesEventCount = 0
513     newFile = 1
514     fileCount += 1
515     else :
516     # go to next file
517     newFile = 1
518     fileCount += 1
519     # if events in file equal to eventsPerJobRequested
520     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
521 gutsche 1.38 # close job and touch new file
522     fullString = parString[:-2]
523 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
524     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
525 gutsche 1.38 self.jobDestination.append(blockSites[block])
526     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
527 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
528 gutsche 1.38 # reset counter
529     jobCount = jobCount + 1
530 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
531     eventsRemaining = eventsRemaining - eventsPerJobRequested
532 gutsche 1.38 jobSkipEventCount = 0
533     # reset file
534 ewv 1.131 parString = ""
535 gutsche 1.38 filesEventCount = 0
536     newFile = 1
537     fileCount += 1
538 ewv 1.131
539 gutsche 1.68 # if more events in file remain than eventsPerJobRequested
540 gutsche 1.38 else :
541 gutsche 1.68 # close job but don't touch new file
542     fullString = parString[:-2]
543     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
544     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
545     self.jobDestination.append(blockSites[block])
546     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
547 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
548 gutsche 1.68 # increase counter
549     jobCount = jobCount + 1
550     totalEventCount = totalEventCount + eventsPerJobRequested
551     eventsRemaining = eventsRemaining - eventsPerJobRequested
552     # calculate skip events for last file
553     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
554     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
555     # remove all but the last file
556     filesEventCount = self.eventsbyfile[file]
557 ewv 1.131 parString = ""
558 gutsche 1.68 parString += '\\\"' + file + '\\\"\,'
559     pass # END if
560     pass # END while (iterate over files in the block)
561 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
562 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
563 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
564 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
565 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
566 ewv 1.131
567 gutsche 1.92 # screen output
568     screenOutput = "List of jobs and available destination sites:\n\n"
569    
570 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
571     noSiteBlock = []
572     bloskNoSite = []
573    
574 gutsche 1.92 blockCounter = 0
575 gutsche 1.104 for block in blocks:
576     if block in jobsOfBlock.keys() :
577     blockCounter += 1
578 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
579 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
580 ewv 1.131 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
581 mcinquil 1.124 bloskNoSite.append( blockCounter )
582 ewv 1.131
583 mcinquil 1.124 common.logger.message(screenOutput)
584 fanzago 1.127 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
585 mcinquil 1.126 msg = 'WARNING: No sites are hosting any part of data for block:\n '
586     virgola = ""
587     if len(bloskNoSite) > 1:
588     virgola = ","
589     for block in bloskNoSite:
590     msg += ' ' + str(block) + virgola
591     msg += '\n Related jobs:\n '
592     virgola = ""
593     if len(noSiteBlock) > 1:
594     virgola = ","
595     for range_jobs in noSiteBlock:
596     msg += str(range_jobs) + virgola
597     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
598     common.logger.message(msg)
599 gutsche 1.92
600 slacapra 1.9 self.list_of_args = list_of_lists
601     return
602    
603 slacapra 1.21 def jobSplittingNoInput(self):
604 slacapra 1.9 """
605     Perform job splitting based on number of event per job
606     """
607     common.logger.debug(5,'Splitting per events')
608 fanzago 1.130
609 ewv 1.131 if (self.selectEventsPerJob):
610 fanzago 1.130 common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
611     if (self.selectNumberOfJobs):
612     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
613     if (self.selectTotalNumberEvents):
614     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
615 slacapra 1.9
616 slacapra 1.10 if (self.total_number_of_events < 0):
617     msg='Cannot split jobs per Events with "-1" as total number of events'
618     raise CrabException(msg)
619    
620 slacapra 1.22 if (self.selectEventsPerJob):
621 spiga 1.65 if (self.selectTotalNumberEvents):
622     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
623 ewv 1.131 elif(self.selectNumberOfJobs) :
624 spiga 1.65 self.total_number_of_jobs =self.theNumberOfJobs
625 ewv 1.131 self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
626 spiga 1.65
627 slacapra 1.22 elif (self.selectNumberOfJobs) :
628     self.total_number_of_jobs = self.theNumberOfJobs
629     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
630 ewv 1.131
631 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
632    
633     # is there any remainder?
634     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
635    
636     common.logger.debug(5,'Check '+str(check))
637    
638 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
639 slacapra 1.9 if check > 0:
640 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
641 slacapra 1.9
642 slacapra 1.10 # argument is seed number.$i
643 slacapra 1.9 self.list_of_args = []
644     for i in range(self.total_number_of_jobs):
645 gutsche 1.35 ## Since there is no input, any site is good
646 ewv 1.131 self.jobDestination.append([""]) #must be empty to write correctly the xml
647 slacapra 1.90 args=[]
648 spiga 1.57 if (self.firstRun):
649 slacapra 1.138 ## pythia first run
650 slacapra 1.90 args.append(str(self.firstRun)+str(i))
651 slacapra 1.23 if (self.sourceSeed):
652 slacapra 1.90 args.append(str(self.sourceSeed)+str(i))
653 slacapra 1.28 if (self.sourceSeedVtx):
654 slacapra 1.90 ## + vtx random seed
655     args.append(str(self.sourceSeedVtx)+str(i))
656     if (self.sourceSeedG4):
657     ## + G4 random seed
658     args.append(str(self.sourceSeedG4)+str(i))
659 ewv 1.131 if (self.sourceSeedMix):
660 slacapra 1.90 ## + Mix random seed
661     args.append(str(self.sourceSeedMix)+str(i))
662     pass
663     pass
664     self.list_of_args.append(args)
665     pass
666 ewv 1.131
667 gutsche 1.3 return
668    
669 spiga 1.42
670     def jobSplittingForScript(self):#CarlosDaniele
671     """
672     Perform job splitting based on number of job
673     """
674     common.logger.debug(5,'Splitting per job')
675     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
676    
677     self.total_number_of_jobs = self.theNumberOfJobs
678    
679     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
680    
681     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
682    
683     # argument is seed number.$i
684     self.list_of_args = []
685     for i in range(self.total_number_of_jobs):
686     ## Since there is no input, any site is good
687     # self.jobDestination.append(["Any"])
688     self.jobDestination.append([""])
689     ## no random seed
690     self.list_of_args.append([str(i)])
691     return
692    
693 gutsche 1.3 def split(self, jobParams):
694 ewv 1.131
695 gutsche 1.3 common.jobDB.load()
696     #### Fabio
697     njobs = self.total_number_of_jobs
698 slacapra 1.9 arglist = self.list_of_args
699 gutsche 1.3 # create the empty structure
700     for i in range(njobs):
701     jobParams.append("")
702 ewv 1.131
703 gutsche 1.3 for job in range(njobs):
704 slacapra 1.17 jobParams[job] = arglist[job]
705     # print str(arglist[job])
706     # print jobParams[job]
707 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
708 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
709     common.jobDB.setDestination(job, self.jobDestination[job])
710 gutsche 1.3
711     common.jobDB.save()
712     return
713 ewv 1.131
714 gutsche 1.3 def getJobTypeArguments(self, nj, sched):
715 slacapra 1.17 result = ''
716     for i in common.jobDB.arguments(nj):
717     result=result+str(i)+" "
718     return result
719 ewv 1.131
720 gutsche 1.3 def numberOfJobs(self):
721     # Fabio
722     return self.total_number_of_jobs
723    
724 slacapra 1.1 def getTarBall(self, exe):
725     """
726     Return the TarBall with lib and exe
727     """
728 ewv 1.131
729 slacapra 1.1 # if it exist, just return it
730 corvo 1.56 #
731     # Marco. Let's start to use relative path for Boss XML files
732     #
733     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
734 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
735     return self.tgzNameWithPath
736    
737     # Prepare a tar gzipped file with user binaries.
738     self.buildTar_(exe)
739    
740     return string.strip(self.tgzNameWithPath)
741    
742     def buildTar_(self, executable):
743    
744     # First of all declare the user Scram area
745     swArea = self.scram.getSWArea_()
746     #print "swArea = ", swArea
747 slacapra 1.63 # swVersion = self.scram.getSWVersion()
748     # print "swVersion = ", swVersion
749 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
750     #print "swReleaseTop = ", swReleaseTop
751 ewv 1.131
752 slacapra 1.1 ## check if working area is release top
753     if swReleaseTop == '' or swArea == swReleaseTop:
754     return
755    
756 slacapra 1.61 import tarfile
757     try: # create tar ball
758     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
759     ## First find the executable
760 slacapra 1.86 if (self.executable != ''):
761 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
762     if ( not exeWithPath ):
763     raise CrabException('User executable '+executable+' not found')
764 ewv 1.131
765 slacapra 1.61 ## then check if it's private or not
766     if exeWithPath.find(swReleaseTop) == -1:
767     # the exe is private, so we must ship
768     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
769     path = swArea+'/'
770 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
771     if exeWithPath.find(path) >= 0 :
772     exe = string.replace(exeWithPath, path,'')
773 slacapra 1.129 tar.add(path+exe,exe)
774 corvo 1.85 else :
775     tar.add(exeWithPath,os.path.basename(executable))
776 slacapra 1.61 pass
777     else:
778     # the exe is from release, we'll find it on WN
779     pass
780 ewv 1.131
781 slacapra 1.61 ## Now get the libraries: only those in local working area
782     libDir = 'lib'
783     lib = swArea+'/' +libDir
784     common.logger.debug(5,"lib "+lib+" to be tarred")
785     if os.path.exists(lib):
786     tar.add(lib,libDir)
787 ewv 1.131
788 slacapra 1.61 ## Now check if module dir is present
789     moduleDir = 'module'
790     module = swArea + '/' + moduleDir
791     if os.path.isdir(module):
792     tar.add(module,moduleDir)
793    
794     ## Now check if any data dir(s) is present
795     swAreaLen=len(swArea)
796     for root, dirs, files in os.walk(swArea):
797     if "data" in dirs:
798     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
799     tar.add(root+"/data",root[swAreaLen:]+"/data")
800    
801 fanzago 1.152 ### Removed ProdAgent Api dependencies ###
802     ### Add ProdAgent dir to tar
803     #paDir = 'ProdAgentApi'
804     #pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
805     #if os.path.isdir(pa):
806     # tar.add(pa,paDir)
807 fanzago 1.93
808 fanzago 1.152 ## Add ProdCommon dir to tar
809 fanzago 1.93 prodcommonDir = 'ProdCommon'
810     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
811     if os.path.isdir(prodcommonPath):
812     tar.add(prodcommonPath,prodcommonDir)
813 ewv 1.131
814 slacapra 1.61 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
815     tar.close()
816     except :
817     raise CrabException('Could not create tar-ball')
818 gutsche 1.72
819     ## check for tarball size
820     tarballinfo = os.stat(self.tgzNameWithPath)
821     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
822     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
823    
824 slacapra 1.61 ## create tar-ball with ML stuff
825 ewv 1.131 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
826 slacapra 1.61 try:
827     tar = tarfile.open(self.MLtgzfile, "w:gz")
828     path=os.environ['CRABDIR'] + '/python/'
829     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
830     tar.add(path+file,file)
831     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
832     tar.close()
833     except :
834 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
835 ewv 1.131
836 slacapra 1.1 return
837 ewv 1.131
838 slacapra 1.97 def additionalInputFileTgz(self):
839     """
840     Put all additional files into a tar ball and return its name
841     """
842     import tarfile
843     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
844     tar = tarfile.open(tarName, "w:gz")
845     for file in self.additional_inbox_files:
846     tar.add(file,string.split(file,'/')[-1])
847     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
848     tar.close()
849     return tarName
850    
851 slacapra 1.1 def wsSetupEnvironment(self, nj):
852     """
853     Returns part of a job script which prepares
854     the execution environment for the job 'nj'.
855     """
856     # Prepare JobType-independent part
857 ewv 1.131 txt = ''
858 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
859 ewv 1.131 txt += 'if [ $middleware == LCG ]; then \n'
860 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
861     txt += 'elif [ $middleware == OSG ]; then\n'
862 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
863 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
864 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
865 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
866     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
867     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
868 gutsche 1.3 txt += ' exit 1\n'
869     txt += ' fi\n'
870 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
871 gutsche 1.3 txt += '\n'
872     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
873     txt += ' cd $WORKING_DIR\n'
874 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
875 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
876 fanzago 1.133 #txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
877     #txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
878 gutsche 1.3 txt += 'fi\n'
879 slacapra 1.1
880     # Prepare JobType-specific part
881     scram = self.scram.commandName()
882     txt += '\n\n'
883 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
884     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
885 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
886     txt += 'status=$?\n'
887     txt += 'if [ $status != 0 ] ; then\n'
888 fanzago 1.133 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
889     txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
890     txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
891     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
892 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
893     txt += ' cd $RUNTIME_AREA\n'
894 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
895     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
896 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
897     txt += ' if [ -d $WORKING_DIR ] ;then\n'
898 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
899     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
900     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
901     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
902 gutsche 1.3 txt += ' fi\n'
903     txt += ' fi \n'
904 fanzago 1.133 txt += ' exit 1 \n'
905 slacapra 1.1 txt += 'fi \n'
906     txt += 'cd '+self.version+'\n'
907 fanzago 1.99 ########## FEDE FOR DBS2 ######################
908     txt += 'SOFTWARE_DIR=`pwd`\n'
909 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
910 fanzago 1.99 ###############################################
911 slacapra 1.1 ### needed grep for bug in scramv1 ###
912     txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
913     # Handle the arguments:
914     txt += "\n"
915 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
916 slacapra 1.1 txt += "\n"
917 mcinquil 1.140 txt += "if [ $nargs -lt "+str(len(self.argsList[nj].split()))+" ]\n"
918 slacapra 1.1 txt += "then\n"
919 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
920 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
921 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
922 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
923 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
924     txt += ' cd $RUNTIME_AREA\n'
925 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
926     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
927 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
928     txt += ' if [ -d $WORKING_DIR ] ;then\n'
929 fanzago 1.96 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
930     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
931     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
932     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
933 gutsche 1.3 txt += ' fi\n'
934     txt += ' fi \n'
935 slacapra 1.1 txt += " exit 1\n"
936     txt += "fi\n"
937     txt += "\n"
938    
939     # Prepare job-specific part
940     job = common.job_list[nj]
941 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
942 ewv 1.131 if (self.datasetPath):
943 fanzago 1.93 txt += '\n'
944     txt += 'DatasetPath='+self.datasetPath+'\n'
945    
946     datasetpath_split = self.datasetPath.split("/")
947 ewv 1.131
948 fanzago 1.93 txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
949     txt += 'DataTier='+datasetpath_split[2]+'\n'
950 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
951 fanzago 1.93
952     else:
953     txt += 'DatasetPath=MCDataTier\n'
954     txt += 'PrimaryDataset=null\n'
955     txt += 'DataTier=null\n'
956     txt += 'ApplicationFamily=MCDataTier\n'
957 spiga 1.42 if self.pset != None: #CarlosDaniele
958     pset = os.path.basename(job.configFilename())
959     txt += '\n'
960 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
961 spiga 1.42 if (self.datasetPath): # standard job
962     txt += 'InputFiles=${args[1]}\n'
963     txt += 'MaxEvents=${args[2]}\n'
964     txt += 'SkipEvents=${args[3]}\n'
965     txt += 'echo "Inputfiles:<$InputFiles>"\n'
966 ewv 1.131 txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
967 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
968 ewv 1.131 txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
969 spiga 1.42 txt += 'echo "SkipEvents:<$SkipEvents>"\n'
970 ewv 1.131 txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
971 spiga 1.42 else: # pythia like job
972 slacapra 1.90 seedIndex=1
973     if (self.firstRun):
974     txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
975 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
976 ewv 1.131 txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
977 slacapra 1.90 seedIndex=seedIndex+1
978    
979 spiga 1.57 if (self.sourceSeed):
980 slacapra 1.90 txt += 'Seed=${args['+str(seedIndex)+']}\n'
981 ewv 1.131 txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
982 slacapra 1.90 seedIndex=seedIndex+1
983     ## the following seeds are not always present
984 spiga 1.42 if (self.sourceSeedVtx):
985 slacapra 1.90 txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
986 spiga 1.42 txt += 'echo "VtxSeed: <$VtxSeed>"\n'
987 ewv 1.131 txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
988 slacapra 1.90 seedIndex += 1
989     if (self.sourceSeedG4):
990     txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
991     txt += 'echo "G4Seed: <$G4Seed>"\n'
992 ewv 1.131 txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
993 slacapra 1.90 seedIndex += 1
994     if (self.sourceSeedMix):
995     txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
996     txt += 'echo "MixSeed: <$mixSeed>"\n'
997 ewv 1.131 txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
998 slacapra 1.90 seedIndex += 1
999     pass
1000     pass
1001     txt += 'mv -f '+pset+' pset.cfg\n'
1002 slacapra 1.1
1003     if len(self.additional_inbox_files) > 0:
1004 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1005     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1006     txt += 'fi\n'
1007 ewv 1.131 pass
1008 slacapra 1.1
1009 spiga 1.42 if self.pset != None: #CarlosDaniele
1010     txt += '\n'
1011     txt += 'echo "***** cat pset.cfg *********"\n'
1012     txt += 'cat pset.cfg\n'
1013     txt += 'echo "****** end pset.cfg ********"\n'
1014     txt += '\n'
1015 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1016 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1017     txt += 'echo "PSETHASH = $PSETHASH" \n'
1018 ewv 1.131 ##############
1019 fanzago 1.93 txt += '\n'
1020 gutsche 1.3 return txt
1021    
1022 slacapra 1.63 def wsBuildExe(self, nj=0):
1023 gutsche 1.3 """
1024     Put in the script the commands to build an executable
1025     or a library.
1026     """
1027    
1028     txt = ""
1029    
1030     if os.path.isfile(self.tgzNameWithPath):
1031 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
1032 gutsche 1.3 txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1033     txt += 'untar_status=$? \n'
1034     txt += 'if [ $untar_status -ne 0 ]; then \n'
1035     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1036     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
1037 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1038 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
1039     txt += ' cd $RUNTIME_AREA\n'
1040 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1041     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1042 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
1043     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1044 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1045     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
1046     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1047     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1048 gutsche 1.3 txt += ' fi\n'
1049     txt += ' fi \n'
1050     txt += ' \n'
1051 gutsche 1.7 txt += ' exit 1 \n'
1052 gutsche 1.3 txt += 'else \n'
1053     txt += ' echo "Successful untar" \n'
1054     txt += 'fi \n'
1055 gutsche 1.50 txt += '\n'
1056 fanzago 1.152 #### Removed ProdAgent API dependencies
1057     txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
1058 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1059 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
1060 fanzago 1.152 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
1061 gutsche 1.50 txt += 'else\n'
1062 fanzago 1.152 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1063 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1064 ewv 1.131 ###################
1065 gutsche 1.50 txt += 'fi\n'
1066     txt += '\n'
1067    
1068 gutsche 1.3 pass
1069 ewv 1.131
1070 slacapra 1.1 return txt
1071    
1072     def modifySteeringCards(self, nj):
1073     """
1074 ewv 1.131 modify the card provided by the user,
1075 slacapra 1.1 writing a new card into share dir
1076     """
1077 ewv 1.131
1078 slacapra 1.1 def executableName(self):
1079 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1080 spiga 1.42 return "sh "
1081     else:
1082     return self.executable
1083 slacapra 1.1
1084     def executableArgs(self):
1085 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1086 spiga 1.42 return self.scriptExe + " $NJob"
1087 fanzago 1.115 else:
1088 ewv 1.139 # if >= CMSSW_1_5_X, add -j crab_fjr.xml
1089 fanzago 1.115 version_array = self.scram.getSWVersion().split('_')
1090     major = 0
1091     minor = 0
1092     try:
1093     major = int(version_array[1])
1094     minor = int(version_array[2])
1095     except:
1096 ewv 1.131 msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1097 fanzago 1.115 raise CrabException(msg)
1098     if major >= 1 and minor >= 5 :
1099 ewv 1.139 return " -j " + self.fjrFileName + " -p pset.cfg"
1100 fanzago 1.115 else:
1101     return " -p pset.cfg"
1102 slacapra 1.1
1103     def inputSandbox(self, nj):
1104     """
1105     Returns a list of filenames to be put in JDL input sandbox.
1106     """
1107     inp_box = []
1108 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1109     # seen = {}
1110 slacapra 1.1 ## code
1111     if os.path.isfile(self.tgzNameWithPath):
1112     inp_box.append(self.tgzNameWithPath)
1113 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1114     inp_box.append(self.MLtgzfile)
1115 slacapra 1.1 ## config
1116 slacapra 1.70 if not self.pset is None:
1117 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1118 slacapra 1.1 ## additional input files
1119 slacapra 1.97 tgz = self.additionalInputFileTgz()
1120     inp_box.append(tgz)
1121 slacapra 1.1 return inp_box
1122    
1123     def outputSandbox(self, nj):
1124     """
1125     Returns a list of filenames to be put in JDL output sandbox.
1126     """
1127     out_box = []
1128    
1129     ## User Declared output files
1130 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1131 ewv 1.131 n_out = nj + 1
1132 slacapra 1.1 out_box.append(self.numberFile_(out,str(n_out)))
1133     return out_box
1134    
1135     def prepareSteeringCards(self):
1136     """
1137     Make initial modifications of the user's steering card file.
1138     """
1139     return
1140    
1141     def wsRenameOutput(self, nj):
1142     """
1143     Returns part of a job script which renames the produced files.
1144     """
1145    
1146     txt = '\n'
1147 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1148     txt += 'echo ">>> current directory content:"\n'
1149 gutsche 1.7 txt += 'ls \n'
1150 fanzago 1.145 txt += '\n'
1151 slacapra 1.54
1152 fanzago 1.128 txt += 'output_exit_status=0\n'
1153 ewv 1.131
1154 fanzago 1.128 for fileWithSuffix in (self.output_file_sandbox):
1155     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1156     txt += '\n'
1157     txt += '# check output file\n'
1158     txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1159 mcinquil 1.144 txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1160     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1161 fanzago 1.128 txt += 'else\n'
1162     txt += ' exit_status=60302\n'
1163 fanzago 1.150 txt += ' echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1164 fanzago 1.128 if common.scheduler.boss_scheduler_name == 'condor_g':
1165     txt += ' if [ $middleware == OSG ]; then \n'
1166     txt += ' echo "prepare dummy output file"\n'
1167     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1168     txt += ' fi \n'
1169     txt += 'fi\n'
1170 ewv 1.131
1171 fanzago 1.128 for fileWithSuffix in (self.output_file):
1172 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1173     txt += '\n'
1174 gutsche 1.7 txt += '# check output file\n'
1175 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1176 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1177     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
1178     txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1179     else:
1180     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1181     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1182 slacapra 1.106 txt += 'else\n'
1183 fanzago 1.117 txt += ' exit_status=60302\n'
1184 fanzago 1.150 txt += ' echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1185 fanzago 1.128 txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1186     txt += ' output_exit_status=$exit_status\n'
1187 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
1188     txt += ' if [ $middleware == OSG ]; then \n'
1189     txt += ' echo "prepare dummy output file"\n'
1190     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1191     txt += ' fi \n'
1192 slacapra 1.1 txt += 'fi\n'
1193 slacapra 1.105 file_list = []
1194     for fileWithSuffix in (self.output_file):
1195     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1196 ewv 1.131
1197 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1198 fanzago 1.149 txt += '\n'
1199 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1200     txt += 'echo ">>> current directory content:"\n'
1201     txt += 'ls \n'
1202     txt += '\n'
1203 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1204 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1205 slacapra 1.1 return txt
1206    
1207     def numberFile_(self, file, txt):
1208     """
1209     append _'txt' before last extension of a file
1210     """
1211     p = string.split(file,".")
1212     # take away last extension
1213     name = p[0]
1214     for x in p[1:-1]:
1215 slacapra 1.90 name=name+"."+x
1216 slacapra 1.1 # add "_txt"
1217     if len(p)>1:
1218 slacapra 1.90 ext = p[len(p)-1]
1219     result = name + '_' + txt + "." + ext
1220 slacapra 1.1 else:
1221 slacapra 1.90 result = name + '_' + txt
1222 ewv 1.131
1223 slacapra 1.1 return result
1224    
1225 slacapra 1.63 def getRequirements(self, nj=[]):
1226 slacapra 1.1 """
1227 ewv 1.131 return job requirements to add to jdl files
1228 slacapra 1.1 """
1229     req = ''
1230 slacapra 1.47 if self.version:
1231 slacapra 1.10 req='Member("VO-cms-' + \
1232 slacapra 1.47 self.version + \
1233 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1234 farinafa 1.111 ## SL add requirement for OS version only if SL4
1235     #reSL4 = re.compile( r'slc4' )
1236 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1237 gutsche 1.107 req+=' && Member("VO-cms-' + \
1238 slacapra 1.105 self.executable_arch + \
1239     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1240 gutsche 1.35
1241     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1242    
1243 slacapra 1.1 return req
1244 gutsche 1.3
1245     def configFilename(self):
1246     """ return the config filename """
1247     return self.name()+'.cfg'
1248    
1249     def wsSetupCMSOSGEnvironment_(self):
1250     """
1251     Returns part of a job script which is prepares
1252     the execution environment and which is common for all CMS jobs.
1253     """
1254 fanzago 1.133 txt = ' echo ">>> setup CMS OSG environment:"\n'
1255     txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1256     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1257 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1258 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1259 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1260 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1261     txt += ' else\n'
1262 ewv 1.135 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1263 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1264     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1265     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1266 gutsche 1.3 txt += '\n'
1267 fanzago 1.133 txt += ' cd $RUNTIME_AREA\n'
1268     txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1269     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1270     txt += ' /bin/rm -rf $WORKING_DIR\n'
1271     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1272 ewv 1.135 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1273 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1274     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1275     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1276     txt += ' fi\n'
1277 gutsche 1.3 txt += '\n'
1278 fanzago 1.133 txt += ' exit 1\n'
1279     txt += ' fi\n'
1280 gutsche 1.3 txt += '\n'
1281 fanzago 1.133 txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1282 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1283 gutsche 1.3
1284     return txt
1285 ewv 1.131
1286 gutsche 1.3 ### OLI_DANIELE
1287     def wsSetupCMSLCGEnvironment_(self):
1288     """
1289     Returns part of a job script which is prepares
1290     the execution environment and which is common for all CMS jobs.
1291     """
1292 fanzago 1.133 txt = ' echo ">>> setup CMS LCG environment:"\n'
1293     txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1294     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1295     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1296     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1297     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1298     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1299     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1300     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1301     txt += ' exit 1\n'
1302     txt += ' else\n'
1303     txt += ' echo "Sourcing environment... "\n'
1304     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1305     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1306     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1307     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1308     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1309     txt += ' exit 1\n'
1310     txt += ' fi\n'
1311     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1312     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1313     txt += ' result=$?\n'
1314     txt += ' if [ $result -ne 0 ]; then\n'
1315     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1316     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1317     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1318     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1319     txt += ' exit 1\n'
1320     txt += ' fi\n'
1321     txt += ' fi\n'
1322     txt += ' \n'
1323     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1324 gutsche 1.3 return txt
1325 gutsche 1.5
1326 ewv 1.131 ### FEDE FOR DBS OUTPUT PUBLICATION
1327 fanzago 1.93 def modifyReport(self, nj):
1328     """
1329 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1330 fanzago 1.93 """
1331 fanzago 1.94
1332 ewv 1.131 txt = ''
1333 fanzago 1.94 try:
1334 ewv 1.131 publish_data = int(self.cfg_params['USER.publish_data'])
1335 fanzago 1.94 except KeyError:
1336     publish_data = 0
1337 ewv 1.131 if (publish_data == 1):
1338 fanzago 1.133 txt += 'echo ">>> Modify Job Report:" \n'
1339 fanzago 1.122 ################ FEDE FOR DBS2 #############################################
1340 fanzago 1.152 #txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1341     txt += 'chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1342 fanzago 1.122 #############################################################################
1343 fanzago 1.94
1344 fanzago 1.122 txt += 'if [ -z "$SE" ]; then\n'
1345     txt += ' SE="" \n'
1346 ewv 1.131 txt += 'fi \n'
1347 fanzago 1.122 txt += 'if [ -z "$SE_PATH" ]; then\n'
1348     txt += ' SE_PATH="" \n'
1349 ewv 1.131 txt += 'fi \n'
1350     txt += 'echo "SE = $SE"\n'
1351 fanzago 1.122 txt += 'echo "SE_PATH = $SE_PATH"\n'
1352 fanzago 1.94
1353     processedDataset = self.cfg_params['USER.publish_data_name']
1354     txt += 'ProcessedDataset='+processedDataset+'\n'
1355     #### LFN=/store/user/<user>/processedDataset_PSETHASH
1356     txt += 'if [ "$SE_PATH" == "" ]; then\n'
1357 fanzago 1.101 #### FEDE: added slash in LFN ##############
1358     txt += ' FOR_LFN=/copy_problems/ \n'
1359 ewv 1.131 txt += 'else \n'
1360 fanzago 1.94 txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1361 fanzago 1.101 ##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1362 fanzago 1.94 txt += ' FOR_LFN=/store$tmp \n'
1363 ewv 1.131 txt += 'fi \n'
1364 fanzago 1.94 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1365     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1366 spiga 1.103 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1367 fanzago 1.152 #txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1368     #txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1369     txt += 'echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1370     txt += '$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1371 ewv 1.131
1372 spiga 1.103 txt += 'modifyReport_result=$?\n'
1373     txt += 'echo modifyReport_result = $modifyReport_result\n'
1374     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1375     txt += ' exit_status=1\n'
1376     txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1377     txt += 'else\n'
1378     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1379     txt += 'fi\n'
1380 fanzago 1.94 else:
1381 fanzago 1.122 txt += 'echo "no data publication required"\n'
1382 fanzago 1.93 return txt
1383 fanzago 1.99
1384     def cleanEnv(self):
1385 ewv 1.131 txt = ''
1386     txt += 'if [ $middleware == OSG ]; then\n'
1387 fanzago 1.99 txt += ' cd $RUNTIME_AREA\n'
1388 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1389     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1390 fanzago 1.99 txt += ' /bin/rm -rf $WORKING_DIR\n'
1391     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1392 fanzago 1.133 txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1393     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1394     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1395     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1396 fanzago 1.99 txt += ' fi\n'
1397     txt += 'fi\n'
1398     txt += '\n'
1399     return txt
1400 fanzago 1.93
1401 gutsche 1.5 def setParam_(self, param, value):
1402     self._params[param] = value
1403    
1404     def getParams(self):
1405     return self._params
1406 gutsche 1.8
1407 gutsche 1.35 def uniquelist(self, old):
1408     """
1409     remove duplicates from a list
1410     """
1411     nd={}
1412     for e in old:
1413     nd[e]=0
1414     return nd.keys()
1415 mcinquil 1.121
1416    
1417     def checkOut(self, limit):
1418     """
1419     check the dimension of the output files
1420     """
1421 mcinquil 1.142 txt = 'echo ">>> Starting output sandbox limit check :"\n'
1422 mcinquil 1.121 allOutFiles = ""
1423     listOutFiles = []
1424 slacapra 1.151 txt += 'stdoutFile=`ls *stdout` \n'
1425     txt += 'stderrFile=`ls *stderr` \n'
1426 fanzago 1.148 if (self.return_data == 1):
1427     for fileOut in (self.output_file+self.output_file_sandbox):
1428     allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob') + " $stdoutFile $stderrFile"
1429     else:
1430     for fileOut in (self.output_file_sandbox):
1431     txt += 'echo " '+fileOut+'";\n'
1432     allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob') + " $stdoutFile $stderrFile"
1433 mcinquil 1.121 txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1434     txt += 'ls -gGhrta;\n'
1435     txt += 'sum=0;\n'
1436     txt += 'for file in '+str(allOutFiles)+' ; do\n'
1437     txt += ' if [ -e $file ]; then\n'
1438     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1439     txt += ' sum=`expr $sum + $tt`\n'
1440     txt += ' else\n'
1441     txt += ' echo "WARNING: output file $file not found!"\n'
1442     txt += ' fi\n'
1443     txt += 'done\n'
1444     txt += 'echo "Total Output dimension: $sum";\n'
1445     txt += 'limit='+str(limit)+';\n'
1446     txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1447     txt += 'if [ $limit -lt $sum ]; then\n'
1448     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1449     txt += ' echo " checking the output file sizes..."\n'
1450     txt += ' tot=0;\n'
1451 mcinquil 1.143 txt += ' for filefile in '+str(allOutFiles)+' ; do\n'
1452     txt += ' dimFile=`ls -gGrta $filefile | awk \'{ print $3 }\';`\n'
1453 mcinquil 1.121 txt += ' tot=`expr $tot + $tt`;\n'
1454 mcinquil 1.143 txt += ' if [ $limit -lt $dimFile ]; then\n'
1455     txt += ' echo "deleting file: $filefile";\n'
1456     txt += ' rm -f $filefile\n'
1457     txt += ' elif [ $limit -lt $tot ]; then\n'
1458     txt += ' echo "deleting file: $filefile";\n'
1459     txt += ' rm -f $filefile\n'
1460     txt += ' else\n'
1461     txt += ' echo "saving file: $filefile"\n'
1462 mcinquil 1.121 txt += ' fi\n'
1463     txt += ' done\n'
1464 mcinquil 1.143
1465 mcinquil 1.121 txt += ' ls -agGhrt;\n'
1466     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1467     txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1468     txt += ' exit_status=70000;\n'
1469     txt += 'else'
1470     txt += ' echo "Total Output dimension $sum is fine.";\n'
1471     txt += 'fi\n'
1472 fanzago 1.133 txt += 'echo "Ending output sandbox limit check"\n'
1473 mcinquil 1.121 return txt