ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.145
Committed: Wed Dec 5 14:40:06 2007 UTC (17 years, 4 months ago) by fanzago
Content type: text/x-python
Branch: MAIN
Changes since 1.144: +1 -1 lines
Log Message:
add ls

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8    
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 mcinquil 1.144 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 mcinquil 1.140 self.argsList = []
17 mcinquil 1.144
18 gutsche 1.3 self._params = {}
19     self.cfg_params = cfg_params
20 fanzago 1.115 # init BlackWhiteListParser
21     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
22    
23 gutsche 1.72 try:
24     self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
25     except KeyError:
26 slacapra 1.86 self.MaxTarBallSize = 9.5
27 gutsche 1.72
28 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
29 gutsche 1.38 self.ncjobs = ncjobs
30    
31 slacapra 1.1 log = common.logger
32 ewv 1.131
33 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
34     self.additional_inbox_files = []
35     self.scriptExe = ''
36     self.executable = ''
37 slacapra 1.71 self.executable_arch = self.scram.getArch()
38 slacapra 1.1 self.tgz_name = 'default.tgz'
39 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
40 corvo 1.56 self.scriptName = 'CMSSW.sh'
41 ewv 1.131 self.pset = '' #scrip use case Da
42 spiga 1.42 self.datasetPath = '' #scrip use case Da
43 gutsche 1.3
44 gutsche 1.50 # set FJR file name
45     self.fjrFileName = 'crab_fjr.xml'
46    
47 slacapra 1.1 self.version = self.scram.getSWVersion()
48 ewv 1.131
49 spiga 1.114 #
50     # Try to block creation in case of arch/version mismatch
51     #
52    
53     a = string.split(self.version, "_")
54    
55     if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
56 fanzago 1.134 msg = "Warning: You are using %s version of CMSSW with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
57     common.logger.message(msg)
58 spiga 1.114 if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
59     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
60     raise CrabException(msg)
61 ewv 1.131
62 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
63 gutsche 1.5 self.setParam_('application', self.version)
64 slacapra 1.47
65 slacapra 1.1 ### collect Data cards
66 gutsche 1.66
67 slacapra 1.1 try:
68 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
69     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
70     if string.lower(tmp)=='none':
71     self.datasetPath = None
72 slacapra 1.21 self.selectNoInput = 1
73 slacapra 1.9 else:
74     self.datasetPath = tmp
75 slacapra 1.21 self.selectNoInput = 0
76 slacapra 1.1 except KeyError:
77 ewv 1.131 msg = "Error: datasetpath not defined "
78 slacapra 1.1 raise CrabException(msg)
79 gutsche 1.5
80     # ML monitoring
81     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
82 slacapra 1.9 if not self.datasetPath:
83     self.setParam_('dataset', 'None')
84     self.setParam_('owner', 'None')
85     else:
86 gutsche 1.92 try:
87     datasetpath_split = self.datasetPath.split("/")
88     # standard style
89 mcinquil 1.120 self.setParam_('datasetFull', self.datasetPath)
90 slacapra 1.137 self.setParam_('dataset', datasetpath_split[1])
91     self.setParam_('owner', datasetpath_split[2])
92 gutsche 1.92 except:
93     self.setParam_('dataset', self.datasetPath)
94     self.setParam_('owner', self.datasetPath)
95 ewv 1.131
96 gutsche 1.8 self.setTaskid_()
97     self.setParam_('taskId', self.cfg_params['taskId'])
98 gutsche 1.5
99 slacapra 1.1 self.dataTiers = []
100    
101     ## now the application
102     try:
103     self.executable = cfg_params['CMSSW.executable']
104 gutsche 1.5 self.setParam_('exe', self.executable)
105 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
106     msg = "Default executable cmsRun overridden. Switch to " + self.executable
107     log.debug(3,msg)
108     except KeyError:
109     self.executable = 'cmsRun'
110 gutsche 1.5 self.setParam_('exe', self.executable)
111 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
112     log.debug(3,msg)
113     pass
114    
115     try:
116     self.pset = cfg_params['CMSSW.pset']
117     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
118 ewv 1.131 if self.pset.lower() != 'none' :
119 spiga 1.42 if (not os.path.exists(self.pset)):
120     raise CrabException("User defined PSet file "+self.pset+" does not exist")
121     else:
122     self.pset = None
123 slacapra 1.1 except KeyError:
124     raise CrabException("PSet file missing. Cannot run cmsRun ")
125    
126     # output files
127 slacapra 1.53 ## stuff which must be returned always via sandbox
128     self.output_file_sandbox = []
129    
130     # add fjr report by default via sandbox
131     self.output_file_sandbox.append(self.fjrFileName)
132    
133     # other output files to be returned via sandbox or copied to SE
134 slacapra 1.1 try:
135     self.output_file = []
136     tmp = cfg_params['CMSSW.output_file']
137     if tmp != '':
138     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
139     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
140     for tmp in tmpOutFiles:
141     tmp=string.strip(tmp)
142     self.output_file.append(tmp)
143     pass
144     else:
145 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
146 slacapra 1.1 pass
147     pass
148     except KeyError:
149 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
150 slacapra 1.1 pass
151    
152     # script_exe file as additional file in inputSandbox
153     try:
154 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
155     if self.scriptExe != '':
156     if not os.path.isfile(self.scriptExe):
157 slacapra 1.64 msg ="ERROR. file "+self.scriptExe+" not found"
158 slacapra 1.10 raise CrabException(msg)
159 spiga 1.42 self.additional_inbox_files.append(string.strip(self.scriptExe))
160 slacapra 1.1 except KeyError:
161 spiga 1.42 self.scriptExe = ''
162 slacapra 1.70
163 spiga 1.42 #CarlosDaniele
164     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
165 slacapra 1.70 msg ="Error. script_exe not defined"
166 spiga 1.42 raise CrabException(msg)
167    
168 slacapra 1.1 ## additional input files
169     try:
170 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
171 slacapra 1.70 for tmp in tmpAddFiles:
172     tmp = string.strip(tmp)
173     dirname = ''
174     if not tmp[0]=="/": dirname = "."
175 corvo 1.85 files = []
176     if string.find(tmp,"*")>-1:
177     files = glob.glob(os.path.join(dirname, tmp))
178     if len(files)==0:
179     raise CrabException("No additional input file found with this pattern: "+tmp)
180     else:
181     files.append(tmp)
182 slacapra 1.70 for file in files:
183     if not os.path.exists(file):
184     raise CrabException("Additional input file not found: "+file)
185 slacapra 1.45 pass
186 slacapra 1.105 # fname = string.split(file, '/')[-1]
187     # storedFile = common.work_space.pathForTgz()+'share/'+fname
188     # shutil.copyfile(file, storedFile)
189     self.additional_inbox_files.append(string.strip(file))
190 slacapra 1.1 pass
191     pass
192 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
193 slacapra 1.1 except KeyError:
194     pass
195    
196 slacapra 1.9 # files per job
197 slacapra 1.1 try:
198 gutsche 1.35 if (cfg_params['CMSSW.files_per_jobs']):
199     raise CrabException("files_per_jobs no longer supported. Quitting.")
200 gutsche 1.3 except KeyError:
201 gutsche 1.35 pass
202 gutsche 1.3
203 slacapra 1.9 ## Events per job
204 gutsche 1.3 try:
205 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
206 slacapra 1.9 self.selectEventsPerJob = 1
207 gutsche 1.3 except KeyError:
208 slacapra 1.9 self.eventsPerJob = -1
209     self.selectEventsPerJob = 0
210 ewv 1.131
211 slacapra 1.22 ## number of jobs
212     try:
213     self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
214     self.selectNumberOfJobs = 1
215     except KeyError:
216     self.theNumberOfJobs = 0
217     self.selectNumberOfJobs = 0
218 slacapra 1.10
219 gutsche 1.35 try:
220     self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
221     self.selectTotalNumberEvents = 1
222     except KeyError:
223     self.total_number_of_events = 0
224     self.selectTotalNumberEvents = 0
225    
226 ewv 1.131 if self.pset != None: #CarlosDaniele
227 spiga 1.42 if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
228     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
229     raise CrabException(msg)
230     else:
231     if (self.selectNumberOfJobs == 0):
232     msg = 'Must specify number_of_jobs.'
233     raise CrabException(msg)
234 gutsche 1.35
235 slacapra 1.22 ## source seed for pythia
236     try:
237     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
238     except KeyError:
239 slacapra 1.23 self.sourceSeed = None
240     common.logger.debug(5,"No seed given")
241 slacapra 1.22
242 slacapra 1.28 try:
243     self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
244     except KeyError:
245     self.sourceSeedVtx = None
246     common.logger.debug(5,"No vertex seed given")
247 slacapra 1.90
248     try:
249     self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
250     except KeyError:
251     self.sourceSeedG4 = None
252     common.logger.debug(5,"No g4 sim hits seed given")
253    
254     try:
255     self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
256     except KeyError:
257     self.sourceSeedMix = None
258     common.logger.debug(5,"No mix seed given")
259    
260 spiga 1.57 try:
261     self.firstRun = int(cfg_params['CMSSW.first_run'])
262     except KeyError:
263     self.firstRun = None
264     common.logger.debug(5,"No first run given")
265 spiga 1.42 if self.pset != None: #CarlosDaniele
266 ewv 1.131 import PsetManipulator as pp
267 slacapra 1.97 PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
268 gutsche 1.3
269 slacapra 1.1 #DBSDLS-start
270 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
271 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
272     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
273 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
274 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
275 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
276 gutsche 1.35 blockSites = {}
277 slacapra 1.9 if self.datasetPath:
278 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
279 ewv 1.131 #DBSDLS-end
280 slacapra 1.1
281     self.tgzNameWithPath = self.getTarBall(self.executable)
282 ewv 1.131
283 slacapra 1.9 ## Select Splitting
284 ewv 1.131 if self.selectNoInput:
285 spiga 1.42 if self.pset == None: #CarlosDaniele
286     self.jobSplittingForScript()
287     else:
288     self.jobSplittingNoInput()
289 gutsche 1.92 else:
290 corvo 1.56 self.jobSplittingByBlocks(blockSites)
291 gutsche 1.5
292 slacapra 1.22 # modify Pset
293 spiga 1.42 if self.pset != None: #CarlosDaniele
294 slacapra 1.86 try:
295     if (self.datasetPath): # standard job
296     # allow to processa a fraction of events in a file
297 ewv 1.131 PsetEdit.inputModule("INPUTFILE")
298     PsetEdit.maxEvent(0)
299     PsetEdit.skipEvent(0)
300 slacapra 1.86 else: # pythia like job
301 slacapra 1.90 PsetEdit.maxEvent(self.eventsPerJob)
302 slacapra 1.86 if (self.firstRun):
303 ewv 1.131 PsetEdit.pythiaFirstRun(0) #First Run
304 slacapra 1.86 if (self.sourceSeed) :
305 ewv 1.131 PsetEdit.pythiaSeed(0)
306 slacapra 1.86 if (self.sourceSeedVtx) :
307 ewv 1.131 PsetEdit.vtxSeed(0)
308 slacapra 1.90 if (self.sourceSeedG4) :
309 ewv 1.131 PsetEdit.g4Seed(0)
310 slacapra 1.90 if (self.sourceSeedMix) :
311 ewv 1.131 PsetEdit.mixSeed(0)
312 slacapra 1.86 # add FrameworkJobReport to parameter-set
313 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
314     PsetEdit.psetWriter(self.configFilename())
315 slacapra 1.86 except:
316     msg='Error while manipuliating ParameterSet: exiting...'
317     raise CrabException(msg)
318 gutsche 1.3
319 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
320    
321 slacapra 1.86 import DataDiscovery
322     import DataLocation
323 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
324    
325     datasetPath=self.datasetPath
326    
327 slacapra 1.1 ## Contact the DBS
328 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
329 slacapra 1.1 try:
330 gutsche 1.66
331 slacapra 1.137 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
332 slacapra 1.1 self.pubdata.fetchDBSInfo()
333    
334 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
335 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
336     raise CrabException(msg)
337 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
338 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
339     raise CrabException(msg)
340 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
341 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
342 slacapra 1.1 raise CrabException(msg)
343    
344 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
345 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
346     self.eventsbyfile=self.pubdata.getEventsPerFile()
347 gutsche 1.3
348 slacapra 1.1 ## get max number of events
349 ewv 1.131 self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
350 slacapra 1.1
351     ## Contact the DLS and build a list of sites hosting the fileblocks
352     try:
353 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
354 gutsche 1.6 dataloc.fetchDLSInfo()
355 slacapra 1.41 except DataLocation.DataLocationError , ex:
356 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
357     raise CrabException(msg)
358 ewv 1.131
359 slacapra 1.1
360 gutsche 1.35 sites = dataloc.getSites()
361     allSites = []
362     listSites = sites.values()
363 slacapra 1.63 for listSite in listSites:
364     for oneSite in listSite:
365 gutsche 1.35 allSites.append(oneSite)
366     allSites = self.uniquelist(allSites)
367 gutsche 1.3
368 gutsche 1.92 # screen output
369     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
370    
371 gutsche 1.35 return sites
372 ewv 1.131
373 mcinquil 1.140 def setArgsList(self, argsList):
374     self.argsList = argsList
375    
376 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
377 slacapra 1.9 """
378 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
379     and no more than one block.
380     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
381     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
382     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
383     self.maxEvents, self.filesbyblock
384     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
385     self.total_number_of_jobs - Total # of jobs
386     self.list_of_args - File(s) job will run on (a list of lists)
387     """
388    
389     # ---- Handle the possible job splitting configurations ---- #
390     if (self.selectTotalNumberEvents):
391     totalEventsRequested = self.total_number_of_events
392     if (self.selectEventsPerJob):
393     eventsPerJobRequested = self.eventsPerJob
394     if (self.selectNumberOfJobs):
395     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
396    
397     # If user requested all the events in the dataset
398     if (totalEventsRequested == -1):
399     eventsRemaining=self.maxEvents
400     # If user requested more events than are in the dataset
401     elif (totalEventsRequested > self.maxEvents):
402     eventsRemaining = self.maxEvents
403     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
404     # If user requested less events than are in the dataset
405     else:
406     eventsRemaining = totalEventsRequested
407 slacapra 1.22
408 slacapra 1.41 # If user requested more events per job than are in the dataset
409     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
410     eventsPerJobRequested = self.maxEvents
411    
412 gutsche 1.35 # For user info at end
413     totalEventCount = 0
414 gutsche 1.3
415 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
416     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
417 slacapra 1.22
418 gutsche 1.35 if (self.selectNumberOfJobs):
419     common.logger.message("May not create the exact number_of_jobs requested.")
420 slacapra 1.23
421 gutsche 1.38 if ( self.ncjobs == 'all' ) :
422     totalNumberOfJobs = 999999999
423     else :
424     totalNumberOfJobs = self.ncjobs
425 ewv 1.131
426 gutsche 1.38
427 gutsche 1.35 blocks = blockSites.keys()
428     blockCount = 0
429     # Backup variable in case self.maxEvents counted events in a non-included block
430     numBlocksInDataset = len(blocks)
431 gutsche 1.3
432 gutsche 1.35 jobCount = 0
433     list_of_lists = []
434 gutsche 1.3
435 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
436     jobsOfBlock = {}
437    
438 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
439     # ---- we've met the requested total # of events ---- #
440 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
441 gutsche 1.35 block = blocks[blockCount]
442 gutsche 1.44 blockCount += 1
443 gutsche 1.104 if block not in jobsOfBlock.keys() :
444     jobsOfBlock[block] = []
445 ewv 1.131
446 gutsche 1.68 if self.eventsbyblock.has_key(block) :
447     numEventsInBlock = self.eventsbyblock[block]
448     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
449 ewv 1.131
450 gutsche 1.68 files = self.filesbyblock[block]
451     numFilesInBlock = len(files)
452     if (numFilesInBlock <= 0):
453     continue
454     fileCount = 0
455    
456     # ---- New block => New job ---- #
457 ewv 1.131 parString = ""
458 gutsche 1.68 # counter for number of events in files currently worked on
459     filesEventCount = 0
460     # flag if next while loop should touch new file
461     newFile = 1
462     # job event counter
463     jobSkipEventCount = 0
464 ewv 1.131
465 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
466     # ---- total # of events or we've gone over all the files in this block ---- #
467     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
468     file = files[fileCount]
469     if newFile :
470     try:
471     numEventsInFile = self.eventsbyfile[file]
472     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
473     # increase filesEventCount
474     filesEventCount += numEventsInFile
475     # Add file to current job
476     parString += '\\\"' + file + '\\\"\,'
477     newFile = 0
478     except KeyError:
479     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
480 ewv 1.131
481 gutsche 1.38
482 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
483     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
484     # if last file in block
485     if ( fileCount == numFilesInBlock-1 ) :
486     # end job using last file, use remaining events in block
487     # close job and touch new file
488     fullString = parString[:-2]
489     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
490     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
491     self.jobDestination.append(blockSites[block])
492     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
493 gutsche 1.92 # fill jobs of block dictionary
494 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
495 gutsche 1.68 # reset counter
496     jobCount = jobCount + 1
497     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
498     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
499     jobSkipEventCount = 0
500     # reset file
501 ewv 1.131 parString = ""
502 gutsche 1.68 filesEventCount = 0
503     newFile = 1
504     fileCount += 1
505     else :
506     # go to next file
507     newFile = 1
508     fileCount += 1
509     # if events in file equal to eventsPerJobRequested
510     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
511 gutsche 1.38 # close job and touch new file
512     fullString = parString[:-2]
513 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
514     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
515 gutsche 1.38 self.jobDestination.append(blockSites[block])
516     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
517 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
518 gutsche 1.38 # reset counter
519     jobCount = jobCount + 1
520 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
521     eventsRemaining = eventsRemaining - eventsPerJobRequested
522 gutsche 1.38 jobSkipEventCount = 0
523     # reset file
524 ewv 1.131 parString = ""
525 gutsche 1.38 filesEventCount = 0
526     newFile = 1
527     fileCount += 1
528 ewv 1.131
529 gutsche 1.68 # if more events in file remain than eventsPerJobRequested
530 gutsche 1.38 else :
531 gutsche 1.68 # close job but don't touch new file
532     fullString = parString[:-2]
533     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
534     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
535     self.jobDestination.append(blockSites[block])
536     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
537 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
538 gutsche 1.68 # increase counter
539     jobCount = jobCount + 1
540     totalEventCount = totalEventCount + eventsPerJobRequested
541     eventsRemaining = eventsRemaining - eventsPerJobRequested
542     # calculate skip events for last file
543     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
544     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
545     # remove all but the last file
546     filesEventCount = self.eventsbyfile[file]
547 ewv 1.131 parString = ""
548 gutsche 1.68 parString += '\\\"' + file + '\\\"\,'
549     pass # END if
550     pass # END while (iterate over files in the block)
551 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
552 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
553 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
554 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
555 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
556 ewv 1.131
557 gutsche 1.92 # screen output
558     screenOutput = "List of jobs and available destination sites:\n\n"
559    
560 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
561     noSiteBlock = []
562     bloskNoSite = []
563    
564 gutsche 1.92 blockCounter = 0
565 gutsche 1.104 for block in blocks:
566     if block in jobsOfBlock.keys() :
567     blockCounter += 1
568 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
569 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
570 ewv 1.131 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
571 mcinquil 1.124 bloskNoSite.append( blockCounter )
572 ewv 1.131
573 mcinquil 1.124 common.logger.message(screenOutput)
574 fanzago 1.127 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
575 mcinquil 1.126 msg = 'WARNING: No sites are hosting any part of data for block:\n '
576     virgola = ""
577     if len(bloskNoSite) > 1:
578     virgola = ","
579     for block in bloskNoSite:
580     msg += ' ' + str(block) + virgola
581     msg += '\n Related jobs:\n '
582     virgola = ""
583     if len(noSiteBlock) > 1:
584     virgola = ","
585     for range_jobs in noSiteBlock:
586     msg += str(range_jobs) + virgola
587     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
588     common.logger.message(msg)
589 gutsche 1.92
590 slacapra 1.9 self.list_of_args = list_of_lists
591     return
592    
593 slacapra 1.21 def jobSplittingNoInput(self):
594 slacapra 1.9 """
595     Perform job splitting based on number of event per job
596     """
597     common.logger.debug(5,'Splitting per events')
598 fanzago 1.130
599 ewv 1.131 if (self.selectEventsPerJob):
600 fanzago 1.130 common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
601     if (self.selectNumberOfJobs):
602     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
603     if (self.selectTotalNumberEvents):
604     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
605 slacapra 1.9
606 slacapra 1.10 if (self.total_number_of_events < 0):
607     msg='Cannot split jobs per Events with "-1" as total number of events'
608     raise CrabException(msg)
609    
610 slacapra 1.22 if (self.selectEventsPerJob):
611 spiga 1.65 if (self.selectTotalNumberEvents):
612     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
613 ewv 1.131 elif(self.selectNumberOfJobs) :
614 spiga 1.65 self.total_number_of_jobs =self.theNumberOfJobs
615 ewv 1.131 self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
616 spiga 1.65
617 slacapra 1.22 elif (self.selectNumberOfJobs) :
618     self.total_number_of_jobs = self.theNumberOfJobs
619     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
620 ewv 1.131
621 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
622    
623     # is there any remainder?
624     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
625    
626     common.logger.debug(5,'Check '+str(check))
627    
628 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
629 slacapra 1.9 if check > 0:
630 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
631 slacapra 1.9
632 slacapra 1.10 # argument is seed number.$i
633 slacapra 1.9 self.list_of_args = []
634     for i in range(self.total_number_of_jobs):
635 gutsche 1.35 ## Since there is no input, any site is good
636 ewv 1.131 self.jobDestination.append([""]) #must be empty to write correctly the xml
637 slacapra 1.90 args=[]
638 spiga 1.57 if (self.firstRun):
639 slacapra 1.138 ## pythia first run
640 slacapra 1.90 args.append(str(self.firstRun)+str(i))
641 slacapra 1.23 if (self.sourceSeed):
642 slacapra 1.90 args.append(str(self.sourceSeed)+str(i))
643 slacapra 1.28 if (self.sourceSeedVtx):
644 slacapra 1.90 ## + vtx random seed
645     args.append(str(self.sourceSeedVtx)+str(i))
646     if (self.sourceSeedG4):
647     ## + G4 random seed
648     args.append(str(self.sourceSeedG4)+str(i))
649 ewv 1.131 if (self.sourceSeedMix):
650 slacapra 1.90 ## + Mix random seed
651     args.append(str(self.sourceSeedMix)+str(i))
652     pass
653     pass
654     self.list_of_args.append(args)
655     pass
656 ewv 1.131
657 gutsche 1.3 return
658    
659 spiga 1.42
660     def jobSplittingForScript(self):#CarlosDaniele
661     """
662     Perform job splitting based on number of job
663     """
664     common.logger.debug(5,'Splitting per job')
665     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
666    
667     self.total_number_of_jobs = self.theNumberOfJobs
668    
669     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
670    
671     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
672    
673     # argument is seed number.$i
674     self.list_of_args = []
675     for i in range(self.total_number_of_jobs):
676     ## Since there is no input, any site is good
677     # self.jobDestination.append(["Any"])
678     self.jobDestination.append([""])
679     ## no random seed
680     self.list_of_args.append([str(i)])
681     return
682    
683 gutsche 1.3 def split(self, jobParams):
684 ewv 1.131
685 gutsche 1.3 common.jobDB.load()
686     #### Fabio
687     njobs = self.total_number_of_jobs
688 slacapra 1.9 arglist = self.list_of_args
689 gutsche 1.3 # create the empty structure
690     for i in range(njobs):
691     jobParams.append("")
692 ewv 1.131
693 gutsche 1.3 for job in range(njobs):
694 slacapra 1.17 jobParams[job] = arglist[job]
695     # print str(arglist[job])
696     # print jobParams[job]
697 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
698 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
699     common.jobDB.setDestination(job, self.jobDestination[job])
700 gutsche 1.3
701     common.jobDB.save()
702     return
703 ewv 1.131
704 gutsche 1.3 def getJobTypeArguments(self, nj, sched):
705 slacapra 1.17 result = ''
706     for i in common.jobDB.arguments(nj):
707     result=result+str(i)+" "
708     return result
709 ewv 1.131
710 gutsche 1.3 def numberOfJobs(self):
711     # Fabio
712     return self.total_number_of_jobs
713    
714 slacapra 1.1 def getTarBall(self, exe):
715     """
716     Return the TarBall with lib and exe
717     """
718 ewv 1.131
719 slacapra 1.1 # if it exist, just return it
720 corvo 1.56 #
721     # Marco. Let's start to use relative path for Boss XML files
722     #
723     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
724 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
725     return self.tgzNameWithPath
726    
727     # Prepare a tar gzipped file with user binaries.
728     self.buildTar_(exe)
729    
730     return string.strip(self.tgzNameWithPath)
731    
732     def buildTar_(self, executable):
733    
734     # First of all declare the user Scram area
735     swArea = self.scram.getSWArea_()
736     #print "swArea = ", swArea
737 slacapra 1.63 # swVersion = self.scram.getSWVersion()
738     # print "swVersion = ", swVersion
739 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
740     #print "swReleaseTop = ", swReleaseTop
741 ewv 1.131
742 slacapra 1.1 ## check if working area is release top
743     if swReleaseTop == '' or swArea == swReleaseTop:
744     return
745    
746 slacapra 1.61 import tarfile
747     try: # create tar ball
748     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
749     ## First find the executable
750 slacapra 1.86 if (self.executable != ''):
751 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
752     if ( not exeWithPath ):
753     raise CrabException('User executable '+executable+' not found')
754 ewv 1.131
755 slacapra 1.61 ## then check if it's private or not
756     if exeWithPath.find(swReleaseTop) == -1:
757     # the exe is private, so we must ship
758     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
759     path = swArea+'/'
760 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
761     if exeWithPath.find(path) >= 0 :
762     exe = string.replace(exeWithPath, path,'')
763 slacapra 1.129 tar.add(path+exe,exe)
764 corvo 1.85 else :
765     tar.add(exeWithPath,os.path.basename(executable))
766 slacapra 1.61 pass
767     else:
768     # the exe is from release, we'll find it on WN
769     pass
770 ewv 1.131
771 slacapra 1.61 ## Now get the libraries: only those in local working area
772     libDir = 'lib'
773     lib = swArea+'/' +libDir
774     common.logger.debug(5,"lib "+lib+" to be tarred")
775     if os.path.exists(lib):
776     tar.add(lib,libDir)
777 ewv 1.131
778 slacapra 1.61 ## Now check if module dir is present
779     moduleDir = 'module'
780     module = swArea + '/' + moduleDir
781     if os.path.isdir(module):
782     tar.add(module,moduleDir)
783    
784     ## Now check if any data dir(s) is present
785     swAreaLen=len(swArea)
786     for root, dirs, files in os.walk(swArea):
787     if "data" in dirs:
788     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
789     tar.add(root+"/data",root[swAreaLen:]+"/data")
790    
791     ## Add ProdAgent dir to tar
792     paDir = 'ProdAgentApi'
793     pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
794     if os.path.isdir(pa):
795     tar.add(pa,paDir)
796 fanzago 1.93
797     ### FEDE FOR DBS PUBLICATION
798     ## Add PRODCOMMON dir to tar
799     prodcommonDir = 'ProdCommon'
800     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
801     if os.path.isdir(prodcommonPath):
802     tar.add(prodcommonPath,prodcommonDir)
803 ewv 1.131 #############################
804    
805 slacapra 1.61 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
806     tar.close()
807     except :
808     raise CrabException('Could not create tar-ball')
809 gutsche 1.72
810     ## check for tarball size
811     tarballinfo = os.stat(self.tgzNameWithPath)
812     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
813     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
814    
815 slacapra 1.61 ## create tar-ball with ML stuff
816 ewv 1.131 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
817 slacapra 1.61 try:
818     tar = tarfile.open(self.MLtgzfile, "w:gz")
819     path=os.environ['CRABDIR'] + '/python/'
820     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
821     tar.add(path+file,file)
822     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
823     tar.close()
824     except :
825 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
826 ewv 1.131
827 slacapra 1.1 return
828 ewv 1.131
829 slacapra 1.97 def additionalInputFileTgz(self):
830     """
831     Put all additional files into a tar ball and return its name
832     """
833     import tarfile
834     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
835     tar = tarfile.open(tarName, "w:gz")
836     for file in self.additional_inbox_files:
837     tar.add(file,string.split(file,'/')[-1])
838     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
839     tar.close()
840     return tarName
841    
842 slacapra 1.1 def wsSetupEnvironment(self, nj):
843     """
844     Returns part of a job script which prepares
845     the execution environment for the job 'nj'.
846     """
847     # Prepare JobType-independent part
848 ewv 1.131 txt = ''
849 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
850 ewv 1.131 txt += 'if [ $middleware == LCG ]; then \n'
851 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
852     txt += 'elif [ $middleware == OSG ]; then\n'
853 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
854 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
855 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
856 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
857     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
858     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
859 gutsche 1.3 txt += ' exit 1\n'
860     txt += ' fi\n'
861 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
862 gutsche 1.3 txt += '\n'
863     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
864     txt += ' cd $WORKING_DIR\n'
865 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
866 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
867 fanzago 1.133 #txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
868     #txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
869 gutsche 1.3 txt += 'fi\n'
870 slacapra 1.1
871     # Prepare JobType-specific part
872     scram = self.scram.commandName()
873     txt += '\n\n'
874 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
875     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
876 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
877     txt += 'status=$?\n'
878     txt += 'if [ $status != 0 ] ; then\n'
879 fanzago 1.133 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
880     txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
881     txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
882     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
883 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
884     txt += ' cd $RUNTIME_AREA\n'
885 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
886     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
887 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
888     txt += ' if [ -d $WORKING_DIR ] ;then\n'
889 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
890     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
891     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
892     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
893 gutsche 1.3 txt += ' fi\n'
894     txt += ' fi \n'
895 fanzago 1.133 txt += ' exit 1 \n'
896 slacapra 1.1 txt += 'fi \n'
897     txt += 'cd '+self.version+'\n'
898 fanzago 1.99 ########## FEDE FOR DBS2 ######################
899     txt += 'SOFTWARE_DIR=`pwd`\n'
900 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
901 fanzago 1.99 ###############################################
902 slacapra 1.1 ### needed grep for bug in scramv1 ###
903 corvo 1.58 txt += scram+' runtime -sh\n'
904 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
905 corvo 1.58 txt += 'echo $PATH\n'
906 slacapra 1.1 # Handle the arguments:
907     txt += "\n"
908 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
909 slacapra 1.1 txt += "\n"
910 mcinquil 1.140 txt += "if [ $nargs -lt "+str(len(self.argsList[nj].split()))+" ]\n"
911 slacapra 1.1 txt += "then\n"
912 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
913 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
914 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
915 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
916 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
917     txt += ' cd $RUNTIME_AREA\n'
918 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
919     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
920 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
921     txt += ' if [ -d $WORKING_DIR ] ;then\n'
922 fanzago 1.96 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
923     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
924     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
925     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
926 gutsche 1.3 txt += ' fi\n'
927     txt += ' fi \n'
928 slacapra 1.1 txt += " exit 1\n"
929     txt += "fi\n"
930     txt += "\n"
931    
932     # Prepare job-specific part
933     job = common.job_list[nj]
934 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
935 ewv 1.131 if (self.datasetPath):
936 fanzago 1.93 txt += '\n'
937     txt += 'DatasetPath='+self.datasetPath+'\n'
938    
939     datasetpath_split = self.datasetPath.split("/")
940 ewv 1.131
941 fanzago 1.93 txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
942     txt += 'DataTier='+datasetpath_split[2]+'\n'
943 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
944 fanzago 1.93
945     else:
946     txt += 'DatasetPath=MCDataTier\n'
947     txt += 'PrimaryDataset=null\n'
948     txt += 'DataTier=null\n'
949     txt += 'ApplicationFamily=MCDataTier\n'
950 spiga 1.42 if self.pset != None: #CarlosDaniele
951     pset = os.path.basename(job.configFilename())
952     txt += '\n'
953 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
954 spiga 1.42 if (self.datasetPath): # standard job
955     txt += 'InputFiles=${args[1]}\n'
956     txt += 'MaxEvents=${args[2]}\n'
957     txt += 'SkipEvents=${args[3]}\n'
958     txt += 'echo "Inputfiles:<$InputFiles>"\n'
959 ewv 1.131 txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
960 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
961 ewv 1.131 txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
962 spiga 1.42 txt += 'echo "SkipEvents:<$SkipEvents>"\n'
963 ewv 1.131 txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
964 spiga 1.42 else: # pythia like job
965 slacapra 1.90 seedIndex=1
966     if (self.firstRun):
967     txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
968 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
969 ewv 1.131 txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
970 slacapra 1.90 seedIndex=seedIndex+1
971    
972 spiga 1.57 if (self.sourceSeed):
973 slacapra 1.90 txt += 'Seed=${args['+str(seedIndex)+']}\n'
974 ewv 1.131 txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
975 slacapra 1.90 seedIndex=seedIndex+1
976     ## the following seeds are not always present
977 spiga 1.42 if (self.sourceSeedVtx):
978 slacapra 1.90 txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
979 spiga 1.42 txt += 'echo "VtxSeed: <$VtxSeed>"\n'
980 ewv 1.131 txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
981 slacapra 1.90 seedIndex += 1
982     if (self.sourceSeedG4):
983     txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
984     txt += 'echo "G4Seed: <$G4Seed>"\n'
985 ewv 1.131 txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
986 slacapra 1.90 seedIndex += 1
987     if (self.sourceSeedMix):
988     txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
989     txt += 'echo "MixSeed: <$mixSeed>"\n'
990 ewv 1.131 txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
991 slacapra 1.90 seedIndex += 1
992     pass
993     pass
994     txt += 'mv -f '+pset+' pset.cfg\n'
995 slacapra 1.1
996     if len(self.additional_inbox_files) > 0:
997 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
998     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
999     txt += 'fi\n'
1000 ewv 1.131 pass
1001 slacapra 1.1
1002 spiga 1.42 if self.pset != None: #CarlosDaniele
1003     txt += '\n'
1004     txt += 'echo "***** cat pset.cfg *********"\n'
1005     txt += 'cat pset.cfg\n'
1006     txt += 'echo "****** end pset.cfg ********"\n'
1007     txt += '\n'
1008 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1009 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1010     txt += 'echo "PSETHASH = $PSETHASH" \n'
1011 ewv 1.131 ##############
1012 fanzago 1.93 txt += '\n'
1013 gutsche 1.3 return txt
1014    
1015 slacapra 1.63 def wsBuildExe(self, nj=0):
1016 gutsche 1.3 """
1017     Put in the script the commands to build an executable
1018     or a library.
1019     """
1020    
1021     txt = ""
1022    
1023     if os.path.isfile(self.tgzNameWithPath):
1024 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
1025 gutsche 1.3 txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1026     txt += 'untar_status=$? \n'
1027     txt += 'if [ $untar_status -ne 0 ]; then \n'
1028     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1029     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
1030 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1031 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
1032     txt += ' cd $RUNTIME_AREA\n'
1033 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1034     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1035 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
1036     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1037 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1038     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
1039     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1040     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1041 gutsche 1.3 txt += ' fi\n'
1042     txt += ' fi \n'
1043     txt += ' \n'
1044 gutsche 1.7 txt += ' exit 1 \n'
1045 gutsche 1.3 txt += 'else \n'
1046     txt += ' echo "Successful untar" \n'
1047     txt += 'fi \n'
1048 gutsche 1.50 txt += '\n'
1049 fanzago 1.133 txt += 'echo ">>> Include ProdAgentApi and PRODCOMMON in PYTHONPATH:"\n'
1050 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1051 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
1052 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1053 gutsche 1.50 txt += 'else\n'
1054 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1055 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1056 ewv 1.131 ###################
1057 gutsche 1.50 txt += 'fi\n'
1058     txt += '\n'
1059    
1060 gutsche 1.3 pass
1061 ewv 1.131
1062 slacapra 1.1 return txt
1063    
1064     def modifySteeringCards(self, nj):
1065     """
1066 ewv 1.131 modify the card provided by the user,
1067 slacapra 1.1 writing a new card into share dir
1068     """
1069 ewv 1.131
1070 slacapra 1.1 def executableName(self):
1071 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1072 spiga 1.42 return "sh "
1073     else:
1074     return self.executable
1075 slacapra 1.1
1076     def executableArgs(self):
1077 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1078 spiga 1.42 return self.scriptExe + " $NJob"
1079 fanzago 1.115 else:
1080 ewv 1.139 # if >= CMSSW_1_5_X, add -j crab_fjr.xml
1081 fanzago 1.115 version_array = self.scram.getSWVersion().split('_')
1082     major = 0
1083     minor = 0
1084     try:
1085     major = int(version_array[1])
1086     minor = int(version_array[2])
1087     except:
1088 ewv 1.131 msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1089 fanzago 1.115 raise CrabException(msg)
1090     if major >= 1 and minor >= 5 :
1091 ewv 1.139 return " -j " + self.fjrFileName + " -p pset.cfg"
1092 fanzago 1.115 else:
1093     return " -p pset.cfg"
1094 slacapra 1.1
1095     def inputSandbox(self, nj):
1096     """
1097     Returns a list of filenames to be put in JDL input sandbox.
1098     """
1099     inp_box = []
1100 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1101     # seen = {}
1102 slacapra 1.1 ## code
1103     if os.path.isfile(self.tgzNameWithPath):
1104     inp_box.append(self.tgzNameWithPath)
1105 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1106     inp_box.append(self.MLtgzfile)
1107 slacapra 1.1 ## config
1108 slacapra 1.70 if not self.pset is None:
1109 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1110 slacapra 1.1 ## additional input files
1111 slacapra 1.97 tgz = self.additionalInputFileTgz()
1112     inp_box.append(tgz)
1113 slacapra 1.1 return inp_box
1114    
1115     def outputSandbox(self, nj):
1116     """
1117     Returns a list of filenames to be put in JDL output sandbox.
1118     """
1119     out_box = []
1120    
1121     ## User Declared output files
1122 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1123 ewv 1.131 n_out = nj + 1
1124 slacapra 1.1 out_box.append(self.numberFile_(out,str(n_out)))
1125     return out_box
1126    
1127     def prepareSteeringCards(self):
1128     """
1129     Make initial modifications of the user's steering card file.
1130     """
1131     return
1132    
1133     def wsRenameOutput(self, nj):
1134     """
1135     Returns part of a job script which renames the produced files.
1136     """
1137    
1138     txt = '\n'
1139 fanzago 1.133 txt += 'echo" >>> directory content:"\n'
1140 gutsche 1.7 txt += 'ls \n'
1141 fanzago 1.145 txt += '\n'
1142 slacapra 1.54
1143 fanzago 1.128 txt += 'output_exit_status=0\n'
1144 ewv 1.131
1145 fanzago 1.128 for fileWithSuffix in (self.output_file_sandbox):
1146     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1147     txt += '\n'
1148     txt += '# check output file\n'
1149     txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1150 mcinquil 1.144 #txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1151     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1152     #txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1153     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1154 fanzago 1.128 txt += 'else\n'
1155     txt += ' exit_status=60302\n'
1156     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1157     if common.scheduler.boss_scheduler_name == 'condor_g':
1158     txt += ' if [ $middleware == OSG ]; then \n'
1159     txt += ' echo "prepare dummy output file"\n'
1160     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1161     txt += ' fi \n'
1162     txt += 'fi\n'
1163 ewv 1.131
1164 fanzago 1.128 for fileWithSuffix in (self.output_file):
1165 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1166     txt += '\n'
1167 gutsche 1.7 txt += '# check output file\n'
1168 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1169 mcinquil 1.144 #txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1170     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1171     #txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1172     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1173 slacapra 1.106 txt += 'else\n'
1174 fanzago 1.117 txt += ' exit_status=60302\n'
1175     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1176 fanzago 1.128 txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1177     txt += ' output_exit_status=$exit_status\n'
1178 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
1179     txt += ' if [ $middleware == OSG ]; then \n'
1180     txt += ' echo "prepare dummy output file"\n'
1181     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1182     txt += ' fi \n'
1183 slacapra 1.1 txt += 'fi\n'
1184 slacapra 1.105 file_list = []
1185     for fileWithSuffix in (self.output_file):
1186     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1187 ewv 1.131
1188 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1189 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1190 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1191 slacapra 1.1 return txt
1192    
1193     def numberFile_(self, file, txt):
1194     """
1195     append _'txt' before last extension of a file
1196     """
1197     p = string.split(file,".")
1198     # take away last extension
1199     name = p[0]
1200     for x in p[1:-1]:
1201 slacapra 1.90 name=name+"."+x
1202 slacapra 1.1 # add "_txt"
1203     if len(p)>1:
1204 slacapra 1.90 ext = p[len(p)-1]
1205     result = name + '_' + txt + "." + ext
1206 slacapra 1.1 else:
1207 slacapra 1.90 result = name + '_' + txt
1208 ewv 1.131
1209 slacapra 1.1 return result
1210    
1211 slacapra 1.63 def getRequirements(self, nj=[]):
1212 slacapra 1.1 """
1213 ewv 1.131 return job requirements to add to jdl files
1214 slacapra 1.1 """
1215     req = ''
1216 slacapra 1.47 if self.version:
1217 slacapra 1.10 req='Member("VO-cms-' + \
1218 slacapra 1.47 self.version + \
1219 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1220 farinafa 1.111 ## SL add requirement for OS version only if SL4
1221     #reSL4 = re.compile( r'slc4' )
1222 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1223 gutsche 1.107 req+=' && Member("VO-cms-' + \
1224 slacapra 1.105 self.executable_arch + \
1225     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1226 gutsche 1.35
1227     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1228    
1229 slacapra 1.1 return req
1230 gutsche 1.3
1231     def configFilename(self):
1232     """ return the config filename """
1233     return self.name()+'.cfg'
1234    
1235     def wsSetupCMSOSGEnvironment_(self):
1236     """
1237     Returns part of a job script which is prepares
1238     the execution environment and which is common for all CMS jobs.
1239     """
1240 fanzago 1.133 txt = ' echo ">>> setup CMS OSG environment:"\n'
1241     txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1242     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1243 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1244 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1245 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1246 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1247     txt += ' else\n'
1248 ewv 1.135 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1249 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1250     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1251     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1252 gutsche 1.3 txt += '\n'
1253 fanzago 1.133 txt += ' cd $RUNTIME_AREA\n'
1254     txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1255     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1256     txt += ' /bin/rm -rf $WORKING_DIR\n'
1257     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1258 ewv 1.135 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1259 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1260     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1261     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1262     txt += ' fi\n'
1263 gutsche 1.3 txt += '\n'
1264 fanzago 1.133 txt += ' exit 1\n'
1265     txt += ' fi\n'
1266 gutsche 1.3 txt += '\n'
1267 fanzago 1.133 txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1268 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1269 gutsche 1.3
1270     return txt
1271 ewv 1.131
1272 gutsche 1.3 ### OLI_DANIELE
1273     def wsSetupCMSLCGEnvironment_(self):
1274     """
1275     Returns part of a job script which is prepares
1276     the execution environment and which is common for all CMS jobs.
1277     """
1278 fanzago 1.133 txt = ' echo ">>> setup CMS LCG environment:"\n'
1279     txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1280     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1281     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1282     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1283     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1284     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1285     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1286     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1287     txt += ' exit 1\n'
1288     txt += ' else\n'
1289     txt += ' echo "Sourcing environment... "\n'
1290     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1291     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1292     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1293     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1294     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1295     txt += ' exit 1\n'
1296     txt += ' fi\n'
1297     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1298     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1299     txt += ' result=$?\n'
1300     txt += ' if [ $result -ne 0 ]; then\n'
1301     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1302     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1303     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1304     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1305     txt += ' exit 1\n'
1306     txt += ' fi\n'
1307     txt += ' fi\n'
1308     txt += ' \n'
1309     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1310 gutsche 1.3 return txt
1311 gutsche 1.5
1312 ewv 1.131 ### FEDE FOR DBS OUTPUT PUBLICATION
1313 fanzago 1.93 def modifyReport(self, nj):
1314     """
1315 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1316 fanzago 1.93 """
1317 fanzago 1.94
1318 ewv 1.131 txt = ''
1319 fanzago 1.94 try:
1320 ewv 1.131 publish_data = int(self.cfg_params['USER.publish_data'])
1321 fanzago 1.94 except KeyError:
1322     publish_data = 0
1323 ewv 1.131 if (publish_data == 1):
1324 fanzago 1.133 txt += 'echo ">>> Modify Job Report:" \n'
1325 fanzago 1.122 ################ FEDE FOR DBS2 #############################################
1326     txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1327     #############################################################################
1328 fanzago 1.94
1329 fanzago 1.122 txt += 'if [ -z "$SE" ]; then\n'
1330     txt += ' SE="" \n'
1331 ewv 1.131 txt += 'fi \n'
1332 fanzago 1.122 txt += 'if [ -z "$SE_PATH" ]; then\n'
1333     txt += ' SE_PATH="" \n'
1334 ewv 1.131 txt += 'fi \n'
1335     txt += 'echo "SE = $SE"\n'
1336 fanzago 1.122 txt += 'echo "SE_PATH = $SE_PATH"\n'
1337 fanzago 1.94
1338     processedDataset = self.cfg_params['USER.publish_data_name']
1339     txt += 'ProcessedDataset='+processedDataset+'\n'
1340     #### LFN=/store/user/<user>/processedDataset_PSETHASH
1341     txt += 'if [ "$SE_PATH" == "" ]; then\n'
1342 fanzago 1.101 #### FEDE: added slash in LFN ##############
1343     txt += ' FOR_LFN=/copy_problems/ \n'
1344 ewv 1.131 txt += 'else \n'
1345 fanzago 1.94 txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1346 fanzago 1.101 ##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1347 fanzago 1.94 txt += ' FOR_LFN=/store$tmp \n'
1348 ewv 1.131 txt += 'fi \n'
1349 fanzago 1.94 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1350     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1351 spiga 1.103 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1352 ewv 1.131 #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1353     txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1354 spiga 1.103 txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1355     #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1356 ewv 1.131
1357 spiga 1.103 txt += 'modifyReport_result=$?\n'
1358     txt += 'echo modifyReport_result = $modifyReport_result\n'
1359     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1360     txt += ' exit_status=1\n'
1361     txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1362     txt += 'else\n'
1363     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1364     txt += 'fi\n'
1365 fanzago 1.94 else:
1366 fanzago 1.122 txt += 'echo "no data publication required"\n'
1367 fanzago 1.93 return txt
1368 fanzago 1.99
1369     def cleanEnv(self):
1370 ewv 1.131 txt = ''
1371     txt += 'if [ $middleware == OSG ]; then\n'
1372 fanzago 1.99 txt += ' cd $RUNTIME_AREA\n'
1373 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1374     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1375 fanzago 1.99 txt += ' /bin/rm -rf $WORKING_DIR\n'
1376     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1377 fanzago 1.133 txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1378     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1379     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1380     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1381 fanzago 1.99 txt += ' fi\n'
1382     txt += 'fi\n'
1383     txt += '\n'
1384     return txt
1385 fanzago 1.93
1386 gutsche 1.5 def setParam_(self, param, value):
1387     self._params[param] = value
1388    
1389     def getParams(self):
1390     return self._params
1391 gutsche 1.8
1392     def setTaskid_(self):
1393     self._taskId = self.cfg_params['taskId']
1394 ewv 1.131
1395 gutsche 1.8 def getTaskid(self):
1396     return self._taskId
1397 gutsche 1.35
1398     def uniquelist(self, old):
1399     """
1400     remove duplicates from a list
1401     """
1402     nd={}
1403     for e in old:
1404     nd[e]=0
1405     return nd.keys()
1406 mcinquil 1.121
1407    
1408     def checkOut(self, limit):
1409     """
1410     check the dimension of the output files
1411     """
1412 mcinquil 1.142 txt = 'echo ">>> Starting output sandbox limit check :"\n'
1413 mcinquil 1.121 allOutFiles = ""
1414     listOutFiles = []
1415 mcinquil 1.143 txt += 'stdoutFile=`ls | grep *stdout` \n'
1416     txt += 'stderrFile=`ls | grep *stderr` \n'
1417 mcinquil 1.121 for fileOut in (self.output_file+self.output_file_sandbox):
1418     if fileOut.find('crab_fjr') == -1:
1419 mcinquil 1.143 allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob') + " $stdoutFile $stderrFile"
1420 mcinquil 1.121 listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1421     txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1422     txt += 'ls -gGhrta;\n'
1423     txt += 'sum=0;\n'
1424     txt += 'for file in '+str(allOutFiles)+' ; do\n'
1425     txt += ' if [ -e $file ]; then\n'
1426     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1427     txt += ' sum=`expr $sum + $tt`\n'
1428     txt += ' else\n'
1429     txt += ' echo "WARNING: output file $file not found!"\n'
1430     txt += ' fi\n'
1431     txt += 'done\n'
1432     txt += 'echo "Total Output dimension: $sum";\n'
1433     txt += 'limit='+str(limit)+';\n'
1434     txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1435     txt += 'if [ $limit -lt $sum ]; then\n'
1436     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1437     txt += ' echo " checking the output file sizes..."\n'
1438     txt += ' tot=0;\n'
1439 mcinquil 1.143 txt += ' for filefile in '+str(allOutFiles)+' ; do\n'
1440     txt += ' dimFile=`ls -gGrta $filefile | awk \'{ print $3 }\';`\n'
1441 mcinquil 1.121 txt += ' tot=`expr $tot + $tt`;\n'
1442 mcinquil 1.143 txt += ' if [ $limit -lt $dimFile ]; then\n'
1443     txt += ' echo "deleting file: $filefile";\n'
1444     txt += ' rm -f $filefile\n'
1445     txt += ' elif [ $limit -lt $tot ]; then\n'
1446     txt += ' echo "deleting file: $filefile";\n'
1447     txt += ' rm -f $filefile\n'
1448     txt += ' else\n'
1449     txt += ' echo "saving file: $filefile"\n'
1450 mcinquil 1.121 txt += ' fi\n'
1451     txt += ' done\n'
1452 mcinquil 1.143
1453 mcinquil 1.121 txt += ' ls -agGhrt;\n'
1454     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1455     txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1456     txt += ' exit_status=70000;\n'
1457     txt += 'else'
1458     txt += ' echo "Total Output dimension $sum is fine.";\n'
1459     txt += 'fi\n'
1460 fanzago 1.133 txt += 'echo "Ending output sandbox limit check"\n'
1461 mcinquil 1.121 return txt