ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.137
Committed: Fri Nov 16 11:09:31 2007 UTC (17 years, 5 months ago) by slacapra
Content type: text/x-python
Branch: MAIN
Changes since 1.136: +3 -26 lines
Log Message:
remove support for DBS1, and remove DBS2 string from  files/class

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8    
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 gutsche 1.38 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 gutsche 1.3 self._params = {}
17     self.cfg_params = cfg_params
18 gutsche 1.38
19 fanzago 1.115 # init BlackWhiteListParser
20     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21    
22 gutsche 1.72 try:
23     self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24     except KeyError:
25 slacapra 1.86 self.MaxTarBallSize = 9.5
26 gutsche 1.72
27 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
28 gutsche 1.38 self.ncjobs = ncjobs
29    
30 slacapra 1.1 log = common.logger
31 ewv 1.131
32 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
33     self.additional_inbox_files = []
34     self.scriptExe = ''
35     self.executable = ''
36 slacapra 1.71 self.executable_arch = self.scram.getArch()
37 slacapra 1.1 self.tgz_name = 'default.tgz'
38 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
39 corvo 1.56 self.scriptName = 'CMSSW.sh'
40 ewv 1.131 self.pset = '' #scrip use case Da
41 spiga 1.42 self.datasetPath = '' #scrip use case Da
42 gutsche 1.3
43 gutsche 1.50 # set FJR file name
44     self.fjrFileName = 'crab_fjr.xml'
45    
46 slacapra 1.1 self.version = self.scram.getSWVersion()
47 ewv 1.131
48 spiga 1.114 #
49     # Try to block creation in case of arch/version mismatch
50     #
51    
52     a = string.split(self.version, "_")
53    
54     if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55 fanzago 1.134 msg = "Warning: You are using %s version of CMSSW with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
56     common.logger.message(msg)
57 spiga 1.114 if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59     raise CrabException(msg)
60 ewv 1.131
61 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
62 gutsche 1.5 self.setParam_('application', self.version)
63 slacapra 1.47
64 slacapra 1.1 ### collect Data cards
65 gutsche 1.66
66 slacapra 1.1 try:
67 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
68     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
69     if string.lower(tmp)=='none':
70     self.datasetPath = None
71 slacapra 1.21 self.selectNoInput = 1
72 slacapra 1.9 else:
73     self.datasetPath = tmp
74 slacapra 1.21 self.selectNoInput = 0
75 slacapra 1.1 except KeyError:
76 ewv 1.131 msg = "Error: datasetpath not defined "
77 slacapra 1.1 raise CrabException(msg)
78 gutsche 1.5
79     # ML monitoring
80     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
81 slacapra 1.9 if not self.datasetPath:
82     self.setParam_('dataset', 'None')
83     self.setParam_('owner', 'None')
84     else:
85 gutsche 1.92 try:
86     datasetpath_split = self.datasetPath.split("/")
87     # standard style
88 mcinquil 1.120 self.setParam_('datasetFull', self.datasetPath)
89 slacapra 1.137 self.setParam_('dataset', datasetpath_split[1])
90     self.setParam_('owner', datasetpath_split[2])
91 gutsche 1.92 except:
92     self.setParam_('dataset', self.datasetPath)
93     self.setParam_('owner', self.datasetPath)
94 ewv 1.131
95 gutsche 1.8 self.setTaskid_()
96     self.setParam_('taskId', self.cfg_params['taskId'])
97 gutsche 1.5
98 slacapra 1.1 self.dataTiers = []
99    
100     ## now the application
101     try:
102     self.executable = cfg_params['CMSSW.executable']
103 gutsche 1.5 self.setParam_('exe', self.executable)
104 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
105     msg = "Default executable cmsRun overridden. Switch to " + self.executable
106     log.debug(3,msg)
107     except KeyError:
108     self.executable = 'cmsRun'
109 gutsche 1.5 self.setParam_('exe', self.executable)
110 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
111     log.debug(3,msg)
112     pass
113    
114     try:
115     self.pset = cfg_params['CMSSW.pset']
116     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
117 ewv 1.131 if self.pset.lower() != 'none' :
118 spiga 1.42 if (not os.path.exists(self.pset)):
119     raise CrabException("User defined PSet file "+self.pset+" does not exist")
120     else:
121     self.pset = None
122 slacapra 1.1 except KeyError:
123     raise CrabException("PSet file missing. Cannot run cmsRun ")
124    
125     # output files
126 slacapra 1.53 ## stuff which must be returned always via sandbox
127     self.output_file_sandbox = []
128    
129     # add fjr report by default via sandbox
130     self.output_file_sandbox.append(self.fjrFileName)
131    
132     # other output files to be returned via sandbox or copied to SE
133 slacapra 1.1 try:
134     self.output_file = []
135     tmp = cfg_params['CMSSW.output_file']
136     if tmp != '':
137     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
138     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
139     for tmp in tmpOutFiles:
140     tmp=string.strip(tmp)
141     self.output_file.append(tmp)
142     pass
143     else:
144 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
145 slacapra 1.1 pass
146     pass
147     except KeyError:
148 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
149 slacapra 1.1 pass
150    
151     # script_exe file as additional file in inputSandbox
152     try:
153 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
154     if self.scriptExe != '':
155     if not os.path.isfile(self.scriptExe):
156 slacapra 1.64 msg ="ERROR. file "+self.scriptExe+" not found"
157 slacapra 1.10 raise CrabException(msg)
158 spiga 1.42 self.additional_inbox_files.append(string.strip(self.scriptExe))
159 slacapra 1.1 except KeyError:
160 spiga 1.42 self.scriptExe = ''
161 slacapra 1.70
162 spiga 1.42 #CarlosDaniele
163     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
164 slacapra 1.70 msg ="Error. script_exe not defined"
165 spiga 1.42 raise CrabException(msg)
166    
167 slacapra 1.1 ## additional input files
168     try:
169 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
170 slacapra 1.70 for tmp in tmpAddFiles:
171     tmp = string.strip(tmp)
172     dirname = ''
173     if not tmp[0]=="/": dirname = "."
174 corvo 1.85 files = []
175     if string.find(tmp,"*")>-1:
176     files = glob.glob(os.path.join(dirname, tmp))
177     if len(files)==0:
178     raise CrabException("No additional input file found with this pattern: "+tmp)
179     else:
180     files.append(tmp)
181 slacapra 1.70 for file in files:
182     if not os.path.exists(file):
183     raise CrabException("Additional input file not found: "+file)
184 slacapra 1.45 pass
185 slacapra 1.105 # fname = string.split(file, '/')[-1]
186     # storedFile = common.work_space.pathForTgz()+'share/'+fname
187     # shutil.copyfile(file, storedFile)
188     self.additional_inbox_files.append(string.strip(file))
189 slacapra 1.1 pass
190     pass
191 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
192 slacapra 1.1 except KeyError:
193     pass
194    
195 slacapra 1.9 # files per job
196 slacapra 1.1 try:
197 gutsche 1.35 if (cfg_params['CMSSW.files_per_jobs']):
198     raise CrabException("files_per_jobs no longer supported. Quitting.")
199 gutsche 1.3 except KeyError:
200 gutsche 1.35 pass
201 gutsche 1.3
202 slacapra 1.9 ## Events per job
203 gutsche 1.3 try:
204 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
205 slacapra 1.9 self.selectEventsPerJob = 1
206 gutsche 1.3 except KeyError:
207 slacapra 1.9 self.eventsPerJob = -1
208     self.selectEventsPerJob = 0
209 ewv 1.131
210 slacapra 1.22 ## number of jobs
211     try:
212     self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
213     self.selectNumberOfJobs = 1
214     except KeyError:
215     self.theNumberOfJobs = 0
216     self.selectNumberOfJobs = 0
217 slacapra 1.10
218 gutsche 1.35 try:
219     self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
220     self.selectTotalNumberEvents = 1
221     except KeyError:
222     self.total_number_of_events = 0
223     self.selectTotalNumberEvents = 0
224    
225 ewv 1.131 if self.pset != None: #CarlosDaniele
226 spiga 1.42 if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
227     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
228     raise CrabException(msg)
229     else:
230     if (self.selectNumberOfJobs == 0):
231     msg = 'Must specify number_of_jobs.'
232     raise CrabException(msg)
233 gutsche 1.35
234 slacapra 1.22 ## source seed for pythia
235     try:
236     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
237     except KeyError:
238 slacapra 1.23 self.sourceSeed = None
239     common.logger.debug(5,"No seed given")
240 slacapra 1.22
241 slacapra 1.28 try:
242     self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
243     except KeyError:
244     self.sourceSeedVtx = None
245     common.logger.debug(5,"No vertex seed given")
246 slacapra 1.90
247     try:
248     self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
249     except KeyError:
250     self.sourceSeedG4 = None
251     common.logger.debug(5,"No g4 sim hits seed given")
252    
253     try:
254     self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
255     except KeyError:
256     self.sourceSeedMix = None
257     common.logger.debug(5,"No mix seed given")
258    
259 spiga 1.57 try:
260     self.firstRun = int(cfg_params['CMSSW.first_run'])
261     except KeyError:
262     self.firstRun = None
263     common.logger.debug(5,"No first run given")
264 spiga 1.42 if self.pset != None: #CarlosDaniele
265 ewv 1.131 import PsetManipulator as pp
266 slacapra 1.97 PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
267 gutsche 1.3
268 slacapra 1.1 #DBSDLS-start
269 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
270 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
271     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
272 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
273 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
274 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
275 gutsche 1.35 blockSites = {}
276 slacapra 1.9 if self.datasetPath:
277 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
278 ewv 1.131 #DBSDLS-end
279 slacapra 1.1
280     self.tgzNameWithPath = self.getTarBall(self.executable)
281 ewv 1.131
282 slacapra 1.9 ## Select Splitting
283 ewv 1.131 if self.selectNoInput:
284 spiga 1.42 if self.pset == None: #CarlosDaniele
285     self.jobSplittingForScript()
286     else:
287     self.jobSplittingNoInput()
288 gutsche 1.92 else:
289 corvo 1.56 self.jobSplittingByBlocks(blockSites)
290 gutsche 1.5
291 slacapra 1.22 # modify Pset
292 spiga 1.42 if self.pset != None: #CarlosDaniele
293 slacapra 1.86 try:
294     if (self.datasetPath): # standard job
295     # allow to processa a fraction of events in a file
296 ewv 1.131 PsetEdit.inputModule("INPUTFILE")
297     PsetEdit.maxEvent(0)
298     PsetEdit.skipEvent(0)
299 slacapra 1.86 else: # pythia like job
300 slacapra 1.90 PsetEdit.maxEvent(self.eventsPerJob)
301 slacapra 1.86 if (self.firstRun):
302 ewv 1.131 PsetEdit.pythiaFirstRun(0) #First Run
303 slacapra 1.86 if (self.sourceSeed) :
304 ewv 1.131 PsetEdit.pythiaSeed(0)
305 slacapra 1.86 if (self.sourceSeedVtx) :
306 ewv 1.131 PsetEdit.vtxSeed(0)
307 slacapra 1.90 if (self.sourceSeedG4) :
308 ewv 1.131 PsetEdit.g4Seed(0)
309 slacapra 1.90 if (self.sourceSeedMix) :
310 ewv 1.131 PsetEdit.mixSeed(0)
311 slacapra 1.86 # add FrameworkJobReport to parameter-set
312 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
313     PsetEdit.psetWriter(self.configFilename())
314 slacapra 1.86 except:
315     msg='Error while manipuliating ParameterSet: exiting...'
316     raise CrabException(msg)
317 gutsche 1.3
318 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
319    
320 slacapra 1.86 import DataDiscovery
321     import DataLocation
322 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
323    
324     datasetPath=self.datasetPath
325    
326 slacapra 1.1 ## Contact the DBS
327 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
328 slacapra 1.1 try:
329 gutsche 1.66
330 slacapra 1.137 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
331 slacapra 1.1 self.pubdata.fetchDBSInfo()
332    
333 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
334 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
335     raise CrabException(msg)
336 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
337 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
338     raise CrabException(msg)
339 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
340 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
341 slacapra 1.1 raise CrabException(msg)
342    
343 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
344 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
345     self.eventsbyfile=self.pubdata.getEventsPerFile()
346 gutsche 1.3
347 slacapra 1.1 ## get max number of events
348 ewv 1.131 self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
349 slacapra 1.1
350     ## Contact the DLS and build a list of sites hosting the fileblocks
351     try:
352 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
353 gutsche 1.6 dataloc.fetchDLSInfo()
354 slacapra 1.41 except DataLocation.DataLocationError , ex:
355 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
356     raise CrabException(msg)
357 ewv 1.131
358 slacapra 1.1
359 gutsche 1.35 sites = dataloc.getSites()
360     allSites = []
361     listSites = sites.values()
362 slacapra 1.63 for listSite in listSites:
363     for oneSite in listSite:
364 gutsche 1.35 allSites.append(oneSite)
365     allSites = self.uniquelist(allSites)
366 gutsche 1.3
367 gutsche 1.92 # screen output
368     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
369    
370 gutsche 1.35 return sites
371 ewv 1.131
372 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
373 slacapra 1.9 """
374 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
375     and no more than one block.
376     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
377     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
378     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
379     self.maxEvents, self.filesbyblock
380     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
381     self.total_number_of_jobs - Total # of jobs
382     self.list_of_args - File(s) job will run on (a list of lists)
383     """
384    
385     # ---- Handle the possible job splitting configurations ---- #
386     if (self.selectTotalNumberEvents):
387     totalEventsRequested = self.total_number_of_events
388     if (self.selectEventsPerJob):
389     eventsPerJobRequested = self.eventsPerJob
390     if (self.selectNumberOfJobs):
391     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
392    
393     # If user requested all the events in the dataset
394     if (totalEventsRequested == -1):
395     eventsRemaining=self.maxEvents
396     # If user requested more events than are in the dataset
397     elif (totalEventsRequested > self.maxEvents):
398     eventsRemaining = self.maxEvents
399     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
400     # If user requested less events than are in the dataset
401     else:
402     eventsRemaining = totalEventsRequested
403 slacapra 1.22
404 slacapra 1.41 # If user requested more events per job than are in the dataset
405     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
406     eventsPerJobRequested = self.maxEvents
407    
408 gutsche 1.35 # For user info at end
409     totalEventCount = 0
410 gutsche 1.3
411 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
412     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
413 slacapra 1.22
414 gutsche 1.35 if (self.selectNumberOfJobs):
415     common.logger.message("May not create the exact number_of_jobs requested.")
416 slacapra 1.23
417 gutsche 1.38 if ( self.ncjobs == 'all' ) :
418     totalNumberOfJobs = 999999999
419     else :
420     totalNumberOfJobs = self.ncjobs
421 ewv 1.131
422 gutsche 1.38
423 gutsche 1.35 blocks = blockSites.keys()
424     blockCount = 0
425     # Backup variable in case self.maxEvents counted events in a non-included block
426     numBlocksInDataset = len(blocks)
427 gutsche 1.3
428 gutsche 1.35 jobCount = 0
429     list_of_lists = []
430 gutsche 1.3
431 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
432     jobsOfBlock = {}
433    
434 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
435     # ---- we've met the requested total # of events ---- #
436 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
437 gutsche 1.35 block = blocks[blockCount]
438 gutsche 1.44 blockCount += 1
439 gutsche 1.104 if block not in jobsOfBlock.keys() :
440     jobsOfBlock[block] = []
441 ewv 1.131
442 gutsche 1.68 if self.eventsbyblock.has_key(block) :
443     numEventsInBlock = self.eventsbyblock[block]
444     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
445 ewv 1.131
446 gutsche 1.68 files = self.filesbyblock[block]
447     numFilesInBlock = len(files)
448     if (numFilesInBlock <= 0):
449     continue
450     fileCount = 0
451    
452     # ---- New block => New job ---- #
453 ewv 1.131 parString = ""
454 gutsche 1.68 # counter for number of events in files currently worked on
455     filesEventCount = 0
456     # flag if next while loop should touch new file
457     newFile = 1
458     # job event counter
459     jobSkipEventCount = 0
460 ewv 1.131
461 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
462     # ---- total # of events or we've gone over all the files in this block ---- #
463     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
464     file = files[fileCount]
465     if newFile :
466     try:
467     numEventsInFile = self.eventsbyfile[file]
468     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
469     # increase filesEventCount
470     filesEventCount += numEventsInFile
471     # Add file to current job
472     parString += '\\\"' + file + '\\\"\,'
473     newFile = 0
474     except KeyError:
475     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
476 ewv 1.131
477 gutsche 1.38
478 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
479     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
480     # if last file in block
481     if ( fileCount == numFilesInBlock-1 ) :
482     # end job using last file, use remaining events in block
483     # close job and touch new file
484     fullString = parString[:-2]
485     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
486     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
487     self.jobDestination.append(blockSites[block])
488     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
489 gutsche 1.92 # fill jobs of block dictionary
490 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
491 gutsche 1.68 # reset counter
492     jobCount = jobCount + 1
493     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
494     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
495     jobSkipEventCount = 0
496     # reset file
497 ewv 1.131 parString = ""
498 gutsche 1.68 filesEventCount = 0
499     newFile = 1
500     fileCount += 1
501     else :
502     # go to next file
503     newFile = 1
504     fileCount += 1
505     # if events in file equal to eventsPerJobRequested
506     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
507 gutsche 1.38 # close job and touch new file
508     fullString = parString[:-2]
509 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
510     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
511 gutsche 1.38 self.jobDestination.append(blockSites[block])
512     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
513 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
514 gutsche 1.38 # reset counter
515     jobCount = jobCount + 1
516 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
517     eventsRemaining = eventsRemaining - eventsPerJobRequested
518 gutsche 1.38 jobSkipEventCount = 0
519     # reset file
520 ewv 1.131 parString = ""
521 gutsche 1.38 filesEventCount = 0
522     newFile = 1
523     fileCount += 1
524 ewv 1.131
525 gutsche 1.68 # if more events in file remain than eventsPerJobRequested
526 gutsche 1.38 else :
527 gutsche 1.68 # close job but don't touch new file
528     fullString = parString[:-2]
529     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
530     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
531     self.jobDestination.append(blockSites[block])
532     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
533 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
534 gutsche 1.68 # increase counter
535     jobCount = jobCount + 1
536     totalEventCount = totalEventCount + eventsPerJobRequested
537     eventsRemaining = eventsRemaining - eventsPerJobRequested
538     # calculate skip events for last file
539     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
540     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
541     # remove all but the last file
542     filesEventCount = self.eventsbyfile[file]
543 ewv 1.131 parString = ""
544 gutsche 1.68 parString += '\\\"' + file + '\\\"\,'
545     pass # END if
546     pass # END while (iterate over files in the block)
547 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
548 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
549 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
550 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
551 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
552 ewv 1.131
553 gutsche 1.92 # screen output
554     screenOutput = "List of jobs and available destination sites:\n\n"
555    
556 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
557     noSiteBlock = []
558     bloskNoSite = []
559    
560 gutsche 1.92 blockCounter = 0
561 gutsche 1.104 for block in blocks:
562     if block in jobsOfBlock.keys() :
563     blockCounter += 1
564 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
565 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
566 ewv 1.131 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
567 mcinquil 1.124 bloskNoSite.append( blockCounter )
568 ewv 1.131
569 mcinquil 1.124 common.logger.message(screenOutput)
570 fanzago 1.127 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
571 mcinquil 1.126 msg = 'WARNING: No sites are hosting any part of data for block:\n '
572     virgola = ""
573     if len(bloskNoSite) > 1:
574     virgola = ","
575     for block in bloskNoSite:
576     msg += ' ' + str(block) + virgola
577     msg += '\n Related jobs:\n '
578     virgola = ""
579     if len(noSiteBlock) > 1:
580     virgola = ","
581     for range_jobs in noSiteBlock:
582     msg += str(range_jobs) + virgola
583     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
584     common.logger.message(msg)
585 gutsche 1.92
586 slacapra 1.9 self.list_of_args = list_of_lists
587     return
588    
589 slacapra 1.21 def jobSplittingNoInput(self):
590 slacapra 1.9 """
591     Perform job splitting based on number of event per job
592     """
593     common.logger.debug(5,'Splitting per events')
594 fanzago 1.130
595 ewv 1.131 if (self.selectEventsPerJob):
596 fanzago 1.130 common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
597     if (self.selectNumberOfJobs):
598     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
599     if (self.selectTotalNumberEvents):
600     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
601 slacapra 1.9
602 slacapra 1.10 if (self.total_number_of_events < 0):
603     msg='Cannot split jobs per Events with "-1" as total number of events'
604     raise CrabException(msg)
605    
606 slacapra 1.22 if (self.selectEventsPerJob):
607 spiga 1.65 if (self.selectTotalNumberEvents):
608     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
609 ewv 1.131 elif(self.selectNumberOfJobs) :
610 spiga 1.65 self.total_number_of_jobs =self.theNumberOfJobs
611 ewv 1.131 self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
612 spiga 1.65
613 slacapra 1.22 elif (self.selectNumberOfJobs) :
614     self.total_number_of_jobs = self.theNumberOfJobs
615     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
616 ewv 1.131
617 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
618    
619     # is there any remainder?
620     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
621    
622     common.logger.debug(5,'Check '+str(check))
623    
624 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
625 slacapra 1.9 if check > 0:
626 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
627 slacapra 1.9
628 slacapra 1.10 # argument is seed number.$i
629 slacapra 1.9 self.list_of_args = []
630     for i in range(self.total_number_of_jobs):
631 gutsche 1.35 ## Since there is no input, any site is good
632 slacapra 1.86 # self.jobDestination.append(["Any"])
633 ewv 1.131 self.jobDestination.append([""]) #must be empty to write correctly the xml
634 slacapra 1.90 args=[]
635 spiga 1.57 if (self.firstRun):
636     ## pythia first run
637 slacapra 1.86 #self.list_of_args.append([(str(self.firstRun)+str(i))])
638 slacapra 1.90 args.append(str(self.firstRun)+str(i))
639 spiga 1.57 else:
640     ## no first run
641 slacapra 1.86 #self.list_of_args.append([str(i)])
642 slacapra 1.90 args.append(str(i))
643 slacapra 1.23 if (self.sourceSeed):
644 slacapra 1.90 args.append(str(self.sourceSeed)+str(i))
645 slacapra 1.28 if (self.sourceSeedVtx):
646 slacapra 1.90 ## + vtx random seed
647     args.append(str(self.sourceSeedVtx)+str(i))
648     if (self.sourceSeedG4):
649     ## + G4 random seed
650     args.append(str(self.sourceSeedG4)+str(i))
651 ewv 1.131 if (self.sourceSeedMix):
652 slacapra 1.90 ## + Mix random seed
653     args.append(str(self.sourceSeedMix)+str(i))
654     pass
655     pass
656     self.list_of_args.append(args)
657     pass
658 ewv 1.131
659 slacapra 1.90 # print self.list_of_args
660 gutsche 1.3
661     return
662    
663 spiga 1.42
664     def jobSplittingForScript(self):#CarlosDaniele
665     """
666     Perform job splitting based on number of job
667     """
668     common.logger.debug(5,'Splitting per job')
669     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
670    
671     self.total_number_of_jobs = self.theNumberOfJobs
672    
673     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
674    
675     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
676    
677     # argument is seed number.$i
678     self.list_of_args = []
679     for i in range(self.total_number_of_jobs):
680     ## Since there is no input, any site is good
681     # self.jobDestination.append(["Any"])
682     self.jobDestination.append([""])
683     ## no random seed
684     self.list_of_args.append([str(i)])
685     return
686    
687 gutsche 1.3 def split(self, jobParams):
688 ewv 1.131
689 gutsche 1.3 common.jobDB.load()
690     #### Fabio
691     njobs = self.total_number_of_jobs
692 slacapra 1.9 arglist = self.list_of_args
693 gutsche 1.3 # create the empty structure
694     for i in range(njobs):
695     jobParams.append("")
696 ewv 1.131
697 gutsche 1.3 for job in range(njobs):
698 slacapra 1.17 jobParams[job] = arglist[job]
699     # print str(arglist[job])
700     # print jobParams[job]
701 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
702 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
703     common.jobDB.setDestination(job, self.jobDestination[job])
704 gutsche 1.3
705     common.jobDB.save()
706     return
707 ewv 1.131
708 gutsche 1.3 def getJobTypeArguments(self, nj, sched):
709 slacapra 1.17 result = ''
710     for i in common.jobDB.arguments(nj):
711     result=result+str(i)+" "
712     return result
713 ewv 1.131
714 gutsche 1.3 def numberOfJobs(self):
715     # Fabio
716     return self.total_number_of_jobs
717    
718 slacapra 1.1 def getTarBall(self, exe):
719     """
720     Return the TarBall with lib and exe
721     """
722 ewv 1.131
723 slacapra 1.1 # if it exist, just return it
724 corvo 1.56 #
725     # Marco. Let's start to use relative path for Boss XML files
726     #
727     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
728 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
729     return self.tgzNameWithPath
730    
731     # Prepare a tar gzipped file with user binaries.
732     self.buildTar_(exe)
733    
734     return string.strip(self.tgzNameWithPath)
735    
736     def buildTar_(self, executable):
737    
738     # First of all declare the user Scram area
739     swArea = self.scram.getSWArea_()
740     #print "swArea = ", swArea
741 slacapra 1.63 # swVersion = self.scram.getSWVersion()
742     # print "swVersion = ", swVersion
743 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
744     #print "swReleaseTop = ", swReleaseTop
745 ewv 1.131
746 slacapra 1.1 ## check if working area is release top
747     if swReleaseTop == '' or swArea == swReleaseTop:
748     return
749    
750 slacapra 1.61 import tarfile
751     try: # create tar ball
752     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
753     ## First find the executable
754 slacapra 1.86 if (self.executable != ''):
755 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
756     if ( not exeWithPath ):
757     raise CrabException('User executable '+executable+' not found')
758 ewv 1.131
759 slacapra 1.61 ## then check if it's private or not
760     if exeWithPath.find(swReleaseTop) == -1:
761     # the exe is private, so we must ship
762     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
763     path = swArea+'/'
764 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
765     if exeWithPath.find(path) >= 0 :
766     exe = string.replace(exeWithPath, path,'')
767 slacapra 1.129 tar.add(path+exe,exe)
768 corvo 1.85 else :
769     tar.add(exeWithPath,os.path.basename(executable))
770 slacapra 1.61 pass
771     else:
772     # the exe is from release, we'll find it on WN
773     pass
774 ewv 1.131
775 slacapra 1.61 ## Now get the libraries: only those in local working area
776     libDir = 'lib'
777     lib = swArea+'/' +libDir
778     common.logger.debug(5,"lib "+lib+" to be tarred")
779     if os.path.exists(lib):
780     tar.add(lib,libDir)
781 ewv 1.131
782 slacapra 1.61 ## Now check if module dir is present
783     moduleDir = 'module'
784     module = swArea + '/' + moduleDir
785     if os.path.isdir(module):
786     tar.add(module,moduleDir)
787    
788     ## Now check if any data dir(s) is present
789     swAreaLen=len(swArea)
790     for root, dirs, files in os.walk(swArea):
791     if "data" in dirs:
792     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
793     tar.add(root+"/data",root[swAreaLen:]+"/data")
794    
795     ## Add ProdAgent dir to tar
796     paDir = 'ProdAgentApi'
797     pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
798     if os.path.isdir(pa):
799     tar.add(pa,paDir)
800 fanzago 1.93
801     ### FEDE FOR DBS PUBLICATION
802     ## Add PRODCOMMON dir to tar
803     prodcommonDir = 'ProdCommon'
804     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
805     if os.path.isdir(prodcommonPath):
806     tar.add(prodcommonPath,prodcommonDir)
807 ewv 1.131 #############################
808    
809 slacapra 1.61 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
810     tar.close()
811     except :
812     raise CrabException('Could not create tar-ball')
813 gutsche 1.72
814     ## check for tarball size
815     tarballinfo = os.stat(self.tgzNameWithPath)
816     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
817     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
818    
819 slacapra 1.61 ## create tar-ball with ML stuff
820 ewv 1.131 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
821 slacapra 1.61 try:
822     tar = tarfile.open(self.MLtgzfile, "w:gz")
823     path=os.environ['CRABDIR'] + '/python/'
824     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
825     tar.add(path+file,file)
826     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
827     tar.close()
828     except :
829 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
830 ewv 1.131
831 slacapra 1.1 return
832 ewv 1.131
833 slacapra 1.97 def additionalInputFileTgz(self):
834     """
835     Put all additional files into a tar ball and return its name
836     """
837     import tarfile
838     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
839     tar = tarfile.open(tarName, "w:gz")
840     for file in self.additional_inbox_files:
841     tar.add(file,string.split(file,'/')[-1])
842     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
843     tar.close()
844     return tarName
845    
846 slacapra 1.1 def wsSetupEnvironment(self, nj):
847     """
848     Returns part of a job script which prepares
849     the execution environment for the job 'nj'.
850     """
851     # Prepare JobType-independent part
852 ewv 1.131 txt = ''
853 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
854 ewv 1.131 txt += 'if [ $middleware == LCG ]; then \n'
855 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
856     txt += 'elif [ $middleware == OSG ]; then\n'
857 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
858 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
859 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
860 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
861     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
862     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
863 gutsche 1.3 txt += ' exit 1\n'
864     txt += ' fi\n'
865 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
866 gutsche 1.3 txt += '\n'
867     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
868     txt += ' cd $WORKING_DIR\n'
869 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
870 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
871 fanzago 1.133 #txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
872     #txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
873 gutsche 1.3 txt += 'fi\n'
874 slacapra 1.1
875     # Prepare JobType-specific part
876     scram = self.scram.commandName()
877     txt += '\n\n'
878 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
879     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
880 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
881     txt += 'status=$?\n'
882     txt += 'if [ $status != 0 ] ; then\n'
883 fanzago 1.133 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
884     txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
885     txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
886     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
887 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
888     txt += ' cd $RUNTIME_AREA\n'
889 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
890     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
891 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
892     txt += ' if [ -d $WORKING_DIR ] ;then\n'
893 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
894     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
895     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
896     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
897 gutsche 1.3 txt += ' fi\n'
898     txt += ' fi \n'
899 fanzago 1.133 txt += ' exit 1 \n'
900 slacapra 1.1 txt += 'fi \n'
901     txt += 'cd '+self.version+'\n'
902 fanzago 1.99 ########## FEDE FOR DBS2 ######################
903     txt += 'SOFTWARE_DIR=`pwd`\n'
904 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
905 fanzago 1.99 ###############################################
906 slacapra 1.1 ### needed grep for bug in scramv1 ###
907 corvo 1.58 txt += scram+' runtime -sh\n'
908 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
909 corvo 1.58 txt += 'echo $PATH\n'
910 slacapra 1.1
911     # Handle the arguments:
912     txt += "\n"
913 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
914 slacapra 1.1 txt += "\n"
915 mkirn 1.32 txt += "if [ $nargs -lt 2 ]\n"
916 slacapra 1.1 txt += "then\n"
917 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
918 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
919 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
920 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
921 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
922     txt += ' cd $RUNTIME_AREA\n'
923 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
924     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
925 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
926     txt += ' if [ -d $WORKING_DIR ] ;then\n'
927 fanzago 1.96 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
928     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
929     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
930     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
931 gutsche 1.3 txt += ' fi\n'
932     txt += ' fi \n'
933 slacapra 1.1 txt += " exit 1\n"
934     txt += "fi\n"
935     txt += "\n"
936    
937     # Prepare job-specific part
938     job = common.job_list[nj]
939 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
940 ewv 1.131 if (self.datasetPath):
941 fanzago 1.93 txt += '\n'
942     txt += 'DatasetPath='+self.datasetPath+'\n'
943    
944     datasetpath_split = self.datasetPath.split("/")
945 ewv 1.131
946 fanzago 1.93 txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
947     txt += 'DataTier='+datasetpath_split[2]+'\n'
948 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
949 fanzago 1.93
950     else:
951     txt += 'DatasetPath=MCDataTier\n'
952     txt += 'PrimaryDataset=null\n'
953     txt += 'DataTier=null\n'
954     txt += 'ApplicationFamily=MCDataTier\n'
955 spiga 1.42 if self.pset != None: #CarlosDaniele
956     pset = os.path.basename(job.configFilename())
957     txt += '\n'
958 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
959 spiga 1.42 if (self.datasetPath): # standard job
960     txt += 'InputFiles=${args[1]}\n'
961     txt += 'MaxEvents=${args[2]}\n'
962     txt += 'SkipEvents=${args[3]}\n'
963     txt += 'echo "Inputfiles:<$InputFiles>"\n'
964 ewv 1.131 txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
965 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
966 ewv 1.131 txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
967 spiga 1.42 txt += 'echo "SkipEvents:<$SkipEvents>"\n'
968 ewv 1.131 txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
969 spiga 1.42 else: # pythia like job
970 slacapra 1.90 seedIndex=1
971     if (self.firstRun):
972     txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
973 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
974 ewv 1.131 txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
975 slacapra 1.90 seedIndex=seedIndex+1
976    
977 spiga 1.57 if (self.sourceSeed):
978 slacapra 1.90 txt += 'Seed=${args['+str(seedIndex)+']}\n'
979 ewv 1.131 txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
980 slacapra 1.90 seedIndex=seedIndex+1
981     ## the following seeds are not always present
982 spiga 1.42 if (self.sourceSeedVtx):
983 slacapra 1.90 txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
984 spiga 1.42 txt += 'echo "VtxSeed: <$VtxSeed>"\n'
985 ewv 1.131 txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
986 slacapra 1.90 seedIndex += 1
987     if (self.sourceSeedG4):
988     txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
989     txt += 'echo "G4Seed: <$G4Seed>"\n'
990 ewv 1.131 txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
991 slacapra 1.90 seedIndex += 1
992     if (self.sourceSeedMix):
993     txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
994     txt += 'echo "MixSeed: <$mixSeed>"\n'
995 ewv 1.131 txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
996 slacapra 1.90 seedIndex += 1
997     pass
998     pass
999     txt += 'mv -f '+pset+' pset.cfg\n'
1000 slacapra 1.1
1001     if len(self.additional_inbox_files) > 0:
1002 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1003     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1004     txt += 'fi\n'
1005 ewv 1.131 pass
1006 slacapra 1.1
1007 spiga 1.42 if self.pset != None: #CarlosDaniele
1008     txt += '\n'
1009     txt += 'echo "***** cat pset.cfg *********"\n'
1010     txt += 'cat pset.cfg\n'
1011     txt += 'echo "****** end pset.cfg ********"\n'
1012     txt += '\n'
1013 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1014 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1015     txt += 'echo "PSETHASH = $PSETHASH" \n'
1016 ewv 1.131 ##############
1017 fanzago 1.93 txt += '\n'
1018 gutsche 1.3 return txt
1019    
1020 slacapra 1.63 def wsBuildExe(self, nj=0):
1021 gutsche 1.3 """
1022     Put in the script the commands to build an executable
1023     or a library.
1024     """
1025    
1026     txt = ""
1027    
1028     if os.path.isfile(self.tgzNameWithPath):
1029 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
1030 gutsche 1.3 txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1031     txt += 'untar_status=$? \n'
1032     txt += 'if [ $untar_status -ne 0 ]; then \n'
1033     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1034     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
1035 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1036 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
1037     txt += ' cd $RUNTIME_AREA\n'
1038 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1039     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1040 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
1041     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1042 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1043     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
1044     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1045     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1046 gutsche 1.3 txt += ' fi\n'
1047     txt += ' fi \n'
1048     txt += ' \n'
1049 gutsche 1.7 txt += ' exit 1 \n'
1050 gutsche 1.3 txt += 'else \n'
1051     txt += ' echo "Successful untar" \n'
1052     txt += 'fi \n'
1053 gutsche 1.50 txt += '\n'
1054 fanzago 1.133 txt += 'echo ">>> Include ProdAgentApi and PRODCOMMON in PYTHONPATH:"\n'
1055 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1056 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
1057 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1058 gutsche 1.50 txt += 'else\n'
1059 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1060 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1061 ewv 1.131 ###################
1062 gutsche 1.50 txt += 'fi\n'
1063     txt += '\n'
1064    
1065 gutsche 1.3 pass
1066 ewv 1.131
1067 slacapra 1.1 return txt
1068    
1069     def modifySteeringCards(self, nj):
1070     """
1071 ewv 1.131 modify the card provided by the user,
1072 slacapra 1.1 writing a new card into share dir
1073     """
1074 ewv 1.131
1075 slacapra 1.1 def executableName(self):
1076 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1077 spiga 1.42 return "sh "
1078     else:
1079     return self.executable
1080 slacapra 1.1
1081     def executableArgs(self):
1082 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1083 spiga 1.42 return self.scriptExe + " $NJob"
1084 fanzago 1.115 else:
1085     # if >= CMSSW_1_5_X, add -e
1086     version_array = self.scram.getSWVersion().split('_')
1087     major = 0
1088     minor = 0
1089     try:
1090     major = int(version_array[1])
1091     minor = int(version_array[2])
1092     except:
1093 ewv 1.131 msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1094 fanzago 1.115 raise CrabException(msg)
1095     if major >= 1 and minor >= 5 :
1096     return " -e -p pset.cfg"
1097     else:
1098     return " -p pset.cfg"
1099 slacapra 1.1
1100     def inputSandbox(self, nj):
1101     """
1102     Returns a list of filenames to be put in JDL input sandbox.
1103     """
1104     inp_box = []
1105 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1106     # seen = {}
1107 slacapra 1.1 ## code
1108     if os.path.isfile(self.tgzNameWithPath):
1109     inp_box.append(self.tgzNameWithPath)
1110 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1111     inp_box.append(self.MLtgzfile)
1112 slacapra 1.1 ## config
1113 slacapra 1.70 if not self.pset is None:
1114 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1115 slacapra 1.1 ## additional input files
1116 slacapra 1.97 tgz = self.additionalInputFileTgz()
1117     inp_box.append(tgz)
1118 slacapra 1.1 return inp_box
1119    
1120     def outputSandbox(self, nj):
1121     """
1122     Returns a list of filenames to be put in JDL output sandbox.
1123     """
1124     out_box = []
1125    
1126     ## User Declared output files
1127 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1128 ewv 1.131 n_out = nj + 1
1129 slacapra 1.1 out_box.append(self.numberFile_(out,str(n_out)))
1130     return out_box
1131    
1132     def prepareSteeringCards(self):
1133     """
1134     Make initial modifications of the user's steering card file.
1135     """
1136     return
1137    
1138     def wsRenameOutput(self, nj):
1139     """
1140     Returns part of a job script which renames the produced files.
1141     """
1142    
1143     txt = '\n'
1144 fanzago 1.133 txt += 'echo" >>> directory content:"\n'
1145 gutsche 1.7 txt += 'ls \n'
1146 fanzago 1.133 txt = '\n'
1147 slacapra 1.54
1148 fanzago 1.128 txt += 'output_exit_status=0\n'
1149 ewv 1.131
1150 fanzago 1.128 for fileWithSuffix in (self.output_file_sandbox):
1151     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1152     txt += '\n'
1153     txt += '# check output file\n'
1154     txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1155     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1156     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1157     txt += 'else\n'
1158     txt += ' exit_status=60302\n'
1159     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1160     if common.scheduler.boss_scheduler_name == 'condor_g':
1161     txt += ' if [ $middleware == OSG ]; then \n'
1162     txt += ' echo "prepare dummy output file"\n'
1163     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1164     txt += ' fi \n'
1165     txt += 'fi\n'
1166 ewv 1.131
1167 fanzago 1.128 for fileWithSuffix in (self.output_file):
1168 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1169     txt += '\n'
1170 gutsche 1.7 txt += '# check output file\n'
1171 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1172 fanzago 1.117 txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1173     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1174 slacapra 1.106 txt += 'else\n'
1175 fanzago 1.117 txt += ' exit_status=60302\n'
1176     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1177 fanzago 1.128 txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1178     txt += ' output_exit_status=$exit_status\n'
1179 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
1180     txt += ' if [ $middleware == OSG ]; then \n'
1181     txt += ' echo "prepare dummy output file"\n'
1182     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1183     txt += ' fi \n'
1184 slacapra 1.1 txt += 'fi\n'
1185 slacapra 1.105 file_list = []
1186     for fileWithSuffix in (self.output_file):
1187     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1188 ewv 1.131
1189 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1190 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1191 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1192 slacapra 1.1 return txt
1193    
1194     def numberFile_(self, file, txt):
1195     """
1196     append _'txt' before last extension of a file
1197     """
1198     p = string.split(file,".")
1199     # take away last extension
1200     name = p[0]
1201     for x in p[1:-1]:
1202 slacapra 1.90 name=name+"."+x
1203 slacapra 1.1 # add "_txt"
1204     if len(p)>1:
1205 slacapra 1.90 ext = p[len(p)-1]
1206     result = name + '_' + txt + "." + ext
1207 slacapra 1.1 else:
1208 slacapra 1.90 result = name + '_' + txt
1209 ewv 1.131
1210 slacapra 1.1 return result
1211    
1212 slacapra 1.63 def getRequirements(self, nj=[]):
1213 slacapra 1.1 """
1214 ewv 1.131 return job requirements to add to jdl files
1215 slacapra 1.1 """
1216     req = ''
1217 slacapra 1.47 if self.version:
1218 slacapra 1.10 req='Member("VO-cms-' + \
1219 slacapra 1.47 self.version + \
1220 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1221 farinafa 1.111 ## SL add requirement for OS version only if SL4
1222     #reSL4 = re.compile( r'slc4' )
1223 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1224 gutsche 1.107 req+=' && Member("VO-cms-' + \
1225 slacapra 1.105 self.executable_arch + \
1226     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1227 gutsche 1.35
1228     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1229    
1230 slacapra 1.1 return req
1231 gutsche 1.3
1232     def configFilename(self):
1233     """ return the config filename """
1234     return self.name()+'.cfg'
1235    
1236     def wsSetupCMSOSGEnvironment_(self):
1237     """
1238     Returns part of a job script which is prepares
1239     the execution environment and which is common for all CMS jobs.
1240     """
1241 fanzago 1.133 txt = ' echo ">>> setup CMS OSG environment:"\n'
1242     txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1243     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1244 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1245 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1246 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1247 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1248     txt += ' else\n'
1249 ewv 1.135 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1250 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1251     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1252     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1253 gutsche 1.3 txt += '\n'
1254 fanzago 1.133 txt += ' cd $RUNTIME_AREA\n'
1255     txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1256     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1257     txt += ' /bin/rm -rf $WORKING_DIR\n'
1258     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1259 ewv 1.135 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1260 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1261     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1262     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1263     txt += ' fi\n'
1264 gutsche 1.3 txt += '\n'
1265 fanzago 1.133 txt += ' exit 1\n'
1266     txt += ' fi\n'
1267 gutsche 1.3 txt += '\n'
1268 fanzago 1.133 txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1269 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1270 gutsche 1.3
1271     return txt
1272 ewv 1.131
1273 gutsche 1.3 ### OLI_DANIELE
1274     def wsSetupCMSLCGEnvironment_(self):
1275     """
1276     Returns part of a job script which is prepares
1277     the execution environment and which is common for all CMS jobs.
1278     """
1279 fanzago 1.133 txt = ' echo ">>> setup CMS LCG environment:"\n'
1280     txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1281     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1282     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1283     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1284     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1285     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1286     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1287     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1288     txt += ' exit 1\n'
1289     txt += ' else\n'
1290     txt += ' echo "Sourcing environment... "\n'
1291     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1292     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1293     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1294     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1295     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1296     txt += ' exit 1\n'
1297     txt += ' fi\n'
1298     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1299     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1300     txt += ' result=$?\n'
1301     txt += ' if [ $result -ne 0 ]; then\n'
1302     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1303     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1304     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1305     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1306     txt += ' exit 1\n'
1307     txt += ' fi\n'
1308     txt += ' fi\n'
1309     txt += ' \n'
1310     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1311 gutsche 1.3 return txt
1312 gutsche 1.5
1313 ewv 1.131 ### FEDE FOR DBS OUTPUT PUBLICATION
1314 fanzago 1.93 def modifyReport(self, nj):
1315     """
1316 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1317 fanzago 1.93 """
1318 fanzago 1.94
1319 ewv 1.131 txt = ''
1320 fanzago 1.94 try:
1321 ewv 1.131 publish_data = int(self.cfg_params['USER.publish_data'])
1322 fanzago 1.94 except KeyError:
1323     publish_data = 0
1324 ewv 1.131 if (publish_data == 1):
1325 fanzago 1.133 txt += 'echo ">>> Modify Job Report:" \n'
1326 fanzago 1.122 ################ FEDE FOR DBS2 #############################################
1327     txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1328     #############################################################################
1329 fanzago 1.94
1330 fanzago 1.122 txt += 'if [ -z "$SE" ]; then\n'
1331     txt += ' SE="" \n'
1332 ewv 1.131 txt += 'fi \n'
1333 fanzago 1.122 txt += 'if [ -z "$SE_PATH" ]; then\n'
1334     txt += ' SE_PATH="" \n'
1335 ewv 1.131 txt += 'fi \n'
1336     txt += 'echo "SE = $SE"\n'
1337 fanzago 1.122 txt += 'echo "SE_PATH = $SE_PATH"\n'
1338 fanzago 1.94
1339     processedDataset = self.cfg_params['USER.publish_data_name']
1340     txt += 'ProcessedDataset='+processedDataset+'\n'
1341     #### LFN=/store/user/<user>/processedDataset_PSETHASH
1342     txt += 'if [ "$SE_PATH" == "" ]; then\n'
1343 fanzago 1.101 #### FEDE: added slash in LFN ##############
1344     txt += ' FOR_LFN=/copy_problems/ \n'
1345 ewv 1.131 txt += 'else \n'
1346 fanzago 1.94 txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1347 fanzago 1.101 ##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1348 fanzago 1.94 txt += ' FOR_LFN=/store$tmp \n'
1349 ewv 1.131 txt += 'fi \n'
1350 fanzago 1.94 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1351     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1352 spiga 1.103 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1353 ewv 1.131 #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1354     txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1355 spiga 1.103 txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1356     #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1357 ewv 1.131
1358 spiga 1.103 txt += 'modifyReport_result=$?\n'
1359     txt += 'echo modifyReport_result = $modifyReport_result\n'
1360     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1361     txt += ' exit_status=1\n'
1362     txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1363     txt += 'else\n'
1364     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1365     txt += 'fi\n'
1366 fanzago 1.94 else:
1367 fanzago 1.122 txt += 'echo "no data publication required"\n'
1368 fanzago 1.93 return txt
1369 fanzago 1.99
1370     def cleanEnv(self):
1371 ewv 1.131 txt = ''
1372     txt += 'if [ $middleware == OSG ]; then\n'
1373 fanzago 1.99 txt += ' cd $RUNTIME_AREA\n'
1374 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1375     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1376 fanzago 1.99 txt += ' /bin/rm -rf $WORKING_DIR\n'
1377     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1378 fanzago 1.133 txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1379     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1380     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1381     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1382 fanzago 1.99 txt += ' fi\n'
1383     txt += 'fi\n'
1384     txt += '\n'
1385     return txt
1386 fanzago 1.93
1387 gutsche 1.5 def setParam_(self, param, value):
1388     self._params[param] = value
1389    
1390     def getParams(self):
1391     return self._params
1392 gutsche 1.8
1393     def setTaskid_(self):
1394     self._taskId = self.cfg_params['taskId']
1395 ewv 1.131
1396 gutsche 1.8 def getTaskid(self):
1397     return self._taskId
1398 gutsche 1.35
1399     def uniquelist(self, old):
1400     """
1401     remove duplicates from a list
1402     """
1403     nd={}
1404     for e in old:
1405     nd[e]=0
1406     return nd.keys()
1407 mcinquil 1.121
1408    
1409     def checkOut(self, limit):
1410     """
1411     check the dimension of the output files
1412     """
1413 fanzago 1.133 txt += 'echo ">>> Starting output sandbox limit check :"\n'
1414 mcinquil 1.121 allOutFiles = ""
1415     listOutFiles = []
1416     for fileOut in (self.output_file+self.output_file_sandbox):
1417     if fileOut.find('crab_fjr') == -1:
1418     allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1419     listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1420     txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1421     txt += 'ls -gGhrta;\n'
1422     txt += 'sum=0;\n'
1423     txt += 'for file in '+str(allOutFiles)+' ; do\n'
1424     txt += ' if [ -e $file ]; then\n'
1425     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1426     txt += ' sum=`expr $sum + $tt`\n'
1427     txt += ' else\n'
1428     txt += ' echo "WARNING: output file $file not found!"\n'
1429     txt += ' fi\n'
1430     txt += 'done\n'
1431     txt += 'echo "Total Output dimension: $sum";\n'
1432     txt += 'limit='+str(limit)+';\n'
1433     txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1434     txt += 'if [ $limit -lt $sum ]; then\n'
1435     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1436     txt += ' echo " checking the output file sizes..."\n'
1437     """
1438     txt += ' dim=0;\n'
1439     txt += ' exclude=0;\n'
1440     txt += ' for files in '+str(allOutFiles)+' ; do\n'
1441     txt += ' sumTemp=0;\n'
1442     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1443     txt += ' if [ $file != $file2 ]; then\n'
1444     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1445     txt += ' sumTemp=`expr $sumTemp + $tt`;\n'
1446     txt += ' fi\n'
1447     txt += ' done\n'
1448     txt += ' if [ $sumTemp -lt $limit ]; then\n'
1449     txt += ' if [ $dim -lt $sumTemp ]; then\n'
1450     txt += ' dim=$sumTemp;\n'
1451     txt += ' exclude=$file;\n'
1452     txt += ' fi\n'
1453     txt += ' fi\n'
1454     txt += ' done\n'
1455     txt += ' echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1456     """
1457     txt += ' tot=0;\n'
1458     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1459     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1460     txt += ' tot=`expr $tot + $tt`;\n'
1461     txt += ' if [ $limit -lt $tot ]; then\n'
1462     txt += ' tot=`expr $tot - $tt`;\n'
1463     txt += ' fileLast=$file;\n'
1464     txt += ' break;\n'
1465     txt += ' fi\n'
1466     txt += ' done\n'
1467     txt += ' echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1468 ewv 1.131 txt += ' flag=0;\n'
1469 mcinquil 1.121 txt += ' for filess in '+str(allOutFiles)+' ; do\n'
1470     txt += ' if [ $fileLast = $filess ]; then\n'
1471     txt += ' flag=1;\n'
1472     txt += ' fi\n'
1473     txt += ' if [ $flag -eq 1 ]; then\n'
1474     txt += ' rm -f $filess;\n'
1475     txt += ' fi\n'
1476     txt += ' done\n'
1477     txt += ' ls -agGhrt;\n'
1478     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1479     txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1480     txt += ' exit_status=70000;\n'
1481     txt += 'else'
1482     txt += ' echo "Total Output dimension $sum is fine.";\n'
1483     txt += 'fi\n'
1484 fanzago 1.133 txt += 'echo "Ending output sandbox limit check"\n'
1485 mcinquil 1.121 return txt