ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.148
Committed: Mon Dec 10 18:28:58 2007 UTC (17 years, 4 months ago) by fanzago
Content type: text/x-python
Branch: MAIN
Changes since 1.147: +14 -9 lines
Log Message:
fixed problem with output copy and check output sandbox size

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8    
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 mcinquil 1.144 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 mcinquil 1.140 self.argsList = []
17 mcinquil 1.144
18 gutsche 1.3 self._params = {}
19     self.cfg_params = cfg_params
20 fanzago 1.115 # init BlackWhiteListParser
21     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
22    
23 gutsche 1.72 try:
24     self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
25     except KeyError:
26 slacapra 1.86 self.MaxTarBallSize = 9.5
27 gutsche 1.72
28 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
29 gutsche 1.38 self.ncjobs = ncjobs
30    
31 slacapra 1.1 log = common.logger
32 ewv 1.131
33 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
34     self.additional_inbox_files = []
35     self.scriptExe = ''
36     self.executable = ''
37 slacapra 1.71 self.executable_arch = self.scram.getArch()
38 slacapra 1.1 self.tgz_name = 'default.tgz'
39 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
40 corvo 1.56 self.scriptName = 'CMSSW.sh'
41 ewv 1.131 self.pset = '' #scrip use case Da
42 spiga 1.42 self.datasetPath = '' #scrip use case Da
43 gutsche 1.3
44 gutsche 1.50 # set FJR file name
45     self.fjrFileName = 'crab_fjr.xml'
46    
47 slacapra 1.1 self.version = self.scram.getSWVersion()
48 ewv 1.131
49 spiga 1.114 #
50     # Try to block creation in case of arch/version mismatch
51     #
52    
53     a = string.split(self.version, "_")
54    
55     if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
56 fanzago 1.134 msg = "Warning: You are using %s version of CMSSW with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
57     common.logger.message(msg)
58 spiga 1.114 if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
59     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
60     raise CrabException(msg)
61 ewv 1.131
62 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
63 gutsche 1.5 self.setParam_('application', self.version)
64 slacapra 1.47
65 slacapra 1.1 ### collect Data cards
66 gutsche 1.66
67 slacapra 1.1 try:
68 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
69     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
70     if string.lower(tmp)=='none':
71     self.datasetPath = None
72 slacapra 1.21 self.selectNoInput = 1
73 slacapra 1.9 else:
74     self.datasetPath = tmp
75 slacapra 1.21 self.selectNoInput = 0
76 slacapra 1.1 except KeyError:
77 ewv 1.131 msg = "Error: datasetpath not defined "
78 slacapra 1.1 raise CrabException(msg)
79 gutsche 1.5
80     # ML monitoring
81     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
82 slacapra 1.9 if not self.datasetPath:
83     self.setParam_('dataset', 'None')
84     self.setParam_('owner', 'None')
85     else:
86 gutsche 1.92 try:
87     datasetpath_split = self.datasetPath.split("/")
88     # standard style
89 mcinquil 1.120 self.setParam_('datasetFull', self.datasetPath)
90 slacapra 1.137 self.setParam_('dataset', datasetpath_split[1])
91     self.setParam_('owner', datasetpath_split[2])
92 gutsche 1.92 except:
93     self.setParam_('dataset', self.datasetPath)
94     self.setParam_('owner', self.datasetPath)
95 ewv 1.131
96 gutsche 1.8 self.setTaskid_()
97     self.setParam_('taskId', self.cfg_params['taskId'])
98 gutsche 1.5
99 slacapra 1.1 self.dataTiers = []
100    
101     ## now the application
102     try:
103     self.executable = cfg_params['CMSSW.executable']
104 gutsche 1.5 self.setParam_('exe', self.executable)
105 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
106     msg = "Default executable cmsRun overridden. Switch to " + self.executable
107     log.debug(3,msg)
108     except KeyError:
109     self.executable = 'cmsRun'
110 gutsche 1.5 self.setParam_('exe', self.executable)
111 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
112     log.debug(3,msg)
113     pass
114    
115     try:
116     self.pset = cfg_params['CMSSW.pset']
117     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
118 ewv 1.131 if self.pset.lower() != 'none' :
119 spiga 1.42 if (not os.path.exists(self.pset)):
120     raise CrabException("User defined PSet file "+self.pset+" does not exist")
121     else:
122     self.pset = None
123 slacapra 1.1 except KeyError:
124     raise CrabException("PSet file missing. Cannot run cmsRun ")
125    
126     # output files
127 slacapra 1.53 ## stuff which must be returned always via sandbox
128     self.output_file_sandbox = []
129    
130     # add fjr report by default via sandbox
131     self.output_file_sandbox.append(self.fjrFileName)
132    
133     # other output files to be returned via sandbox or copied to SE
134 slacapra 1.1 try:
135     self.output_file = []
136     tmp = cfg_params['CMSSW.output_file']
137     if tmp != '':
138     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
139     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
140     for tmp in tmpOutFiles:
141     tmp=string.strip(tmp)
142     self.output_file.append(tmp)
143     pass
144     else:
145 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
146 slacapra 1.1 pass
147     pass
148     except KeyError:
149 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
150 slacapra 1.1 pass
151    
152     # script_exe file as additional file in inputSandbox
153     try:
154 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
155     if self.scriptExe != '':
156     if not os.path.isfile(self.scriptExe):
157 slacapra 1.64 msg ="ERROR. file "+self.scriptExe+" not found"
158 slacapra 1.10 raise CrabException(msg)
159 spiga 1.42 self.additional_inbox_files.append(string.strip(self.scriptExe))
160 slacapra 1.1 except KeyError:
161 spiga 1.42 self.scriptExe = ''
162 slacapra 1.70
163 spiga 1.42 #CarlosDaniele
164     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
165 slacapra 1.70 msg ="Error. script_exe not defined"
166 spiga 1.42 raise CrabException(msg)
167    
168 slacapra 1.1 ## additional input files
169     try:
170 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
171 slacapra 1.70 for tmp in tmpAddFiles:
172     tmp = string.strip(tmp)
173     dirname = ''
174     if not tmp[0]=="/": dirname = "."
175 corvo 1.85 files = []
176     if string.find(tmp,"*")>-1:
177     files = glob.glob(os.path.join(dirname, tmp))
178     if len(files)==0:
179     raise CrabException("No additional input file found with this pattern: "+tmp)
180     else:
181     files.append(tmp)
182 slacapra 1.70 for file in files:
183     if not os.path.exists(file):
184     raise CrabException("Additional input file not found: "+file)
185 slacapra 1.45 pass
186 slacapra 1.105 # fname = string.split(file, '/')[-1]
187     # storedFile = common.work_space.pathForTgz()+'share/'+fname
188     # shutil.copyfile(file, storedFile)
189     self.additional_inbox_files.append(string.strip(file))
190 slacapra 1.1 pass
191     pass
192 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
193 slacapra 1.1 except KeyError:
194     pass
195    
196 slacapra 1.9 # files per job
197 slacapra 1.1 try:
198 gutsche 1.35 if (cfg_params['CMSSW.files_per_jobs']):
199     raise CrabException("files_per_jobs no longer supported. Quitting.")
200 gutsche 1.3 except KeyError:
201 gutsche 1.35 pass
202 gutsche 1.3
203 slacapra 1.9 ## Events per job
204 gutsche 1.3 try:
205 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
206 slacapra 1.9 self.selectEventsPerJob = 1
207 gutsche 1.3 except KeyError:
208 slacapra 1.9 self.eventsPerJob = -1
209     self.selectEventsPerJob = 0
210 ewv 1.131
211 slacapra 1.22 ## number of jobs
212     try:
213     self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
214     self.selectNumberOfJobs = 1
215     except KeyError:
216     self.theNumberOfJobs = 0
217     self.selectNumberOfJobs = 0
218 slacapra 1.10
219 gutsche 1.35 try:
220     self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
221     self.selectTotalNumberEvents = 1
222     except KeyError:
223     self.total_number_of_events = 0
224     self.selectTotalNumberEvents = 0
225    
226 ewv 1.131 if self.pset != None: #CarlosDaniele
227 spiga 1.42 if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
228     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
229     raise CrabException(msg)
230     else:
231     if (self.selectNumberOfJobs == 0):
232     msg = 'Must specify number_of_jobs.'
233     raise CrabException(msg)
234 gutsche 1.35
235 slacapra 1.22 ## source seed for pythia
236     try:
237     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
238     except KeyError:
239 slacapra 1.23 self.sourceSeed = None
240     common.logger.debug(5,"No seed given")
241 slacapra 1.22
242 slacapra 1.28 try:
243     self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
244     except KeyError:
245     self.sourceSeedVtx = None
246     common.logger.debug(5,"No vertex seed given")
247 slacapra 1.90
248     try:
249     self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
250     except KeyError:
251     self.sourceSeedG4 = None
252     common.logger.debug(5,"No g4 sim hits seed given")
253    
254     try:
255     self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
256     except KeyError:
257     self.sourceSeedMix = None
258     common.logger.debug(5,"No mix seed given")
259    
260 spiga 1.57 try:
261     self.firstRun = int(cfg_params['CMSSW.first_run'])
262     except KeyError:
263     self.firstRun = None
264     common.logger.debug(5,"No first run given")
265 spiga 1.42 if self.pset != None: #CarlosDaniele
266 ewv 1.131 import PsetManipulator as pp
267 slacapra 1.97 PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
268 gutsche 1.3
269 ewv 1.147 # Copy/return
270    
271     try:
272     self.copy_data = int(cfg_params['USER.copy_data'])
273     except KeyError:
274     self.copy_data = 0
275     try:
276     self.return_data = int(cfg_params['USER.return_data'])
277     except KeyError:
278     self.return_data = 0
279    
280 slacapra 1.1 #DBSDLS-start
281 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
282 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
283     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
284 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
285 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
286 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
287 gutsche 1.35 blockSites = {}
288 slacapra 1.9 if self.datasetPath:
289 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
290 ewv 1.131 #DBSDLS-end
291 slacapra 1.1
292     self.tgzNameWithPath = self.getTarBall(self.executable)
293 ewv 1.131
294 slacapra 1.9 ## Select Splitting
295 ewv 1.131 if self.selectNoInput:
296 spiga 1.42 if self.pset == None: #CarlosDaniele
297     self.jobSplittingForScript()
298     else:
299     self.jobSplittingNoInput()
300 gutsche 1.92 else:
301 corvo 1.56 self.jobSplittingByBlocks(blockSites)
302 gutsche 1.5
303 slacapra 1.22 # modify Pset
304 spiga 1.42 if self.pset != None: #CarlosDaniele
305 slacapra 1.86 try:
306     if (self.datasetPath): # standard job
307     # allow to processa a fraction of events in a file
308 ewv 1.131 PsetEdit.inputModule("INPUTFILE")
309     PsetEdit.maxEvent(0)
310     PsetEdit.skipEvent(0)
311 slacapra 1.86 else: # pythia like job
312 slacapra 1.90 PsetEdit.maxEvent(self.eventsPerJob)
313 slacapra 1.86 if (self.firstRun):
314 ewv 1.131 PsetEdit.pythiaFirstRun(0) #First Run
315 slacapra 1.86 if (self.sourceSeed) :
316 ewv 1.131 PsetEdit.pythiaSeed(0)
317 slacapra 1.86 if (self.sourceSeedVtx) :
318 ewv 1.131 PsetEdit.vtxSeed(0)
319 slacapra 1.90 if (self.sourceSeedG4) :
320 ewv 1.131 PsetEdit.g4Seed(0)
321 slacapra 1.90 if (self.sourceSeedMix) :
322 ewv 1.131 PsetEdit.mixSeed(0)
323 slacapra 1.86 # add FrameworkJobReport to parameter-set
324 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
325     PsetEdit.psetWriter(self.configFilename())
326 slacapra 1.86 except:
327     msg='Error while manipuliating ParameterSet: exiting...'
328     raise CrabException(msg)
329 gutsche 1.3
330 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
331    
332 slacapra 1.86 import DataDiscovery
333     import DataLocation
334 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
335    
336     datasetPath=self.datasetPath
337    
338 slacapra 1.1 ## Contact the DBS
339 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
340 slacapra 1.1 try:
341 gutsche 1.66
342 slacapra 1.137 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
343 slacapra 1.1 self.pubdata.fetchDBSInfo()
344    
345 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
346 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
347     raise CrabException(msg)
348 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
349 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
350     raise CrabException(msg)
351 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
352 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
353 slacapra 1.1 raise CrabException(msg)
354    
355 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
356 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
357     self.eventsbyfile=self.pubdata.getEventsPerFile()
358 gutsche 1.3
359 slacapra 1.1 ## get max number of events
360 ewv 1.131 self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
361 slacapra 1.1
362     ## Contact the DLS and build a list of sites hosting the fileblocks
363     try:
364 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
365 gutsche 1.6 dataloc.fetchDLSInfo()
366 slacapra 1.41 except DataLocation.DataLocationError , ex:
367 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
368     raise CrabException(msg)
369 ewv 1.131
370 slacapra 1.1
371 gutsche 1.35 sites = dataloc.getSites()
372     allSites = []
373     listSites = sites.values()
374 slacapra 1.63 for listSite in listSites:
375     for oneSite in listSite:
376 gutsche 1.35 allSites.append(oneSite)
377     allSites = self.uniquelist(allSites)
378 gutsche 1.3
379 gutsche 1.92 # screen output
380     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
381    
382 gutsche 1.35 return sites
383 ewv 1.131
384 mcinquil 1.140 def setArgsList(self, argsList):
385     self.argsList = argsList
386    
387 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
388 slacapra 1.9 """
389 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
390     and no more than one block.
391     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
392     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
393     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
394     self.maxEvents, self.filesbyblock
395     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
396     self.total_number_of_jobs - Total # of jobs
397     self.list_of_args - File(s) job will run on (a list of lists)
398     """
399    
400     # ---- Handle the possible job splitting configurations ---- #
401     if (self.selectTotalNumberEvents):
402     totalEventsRequested = self.total_number_of_events
403     if (self.selectEventsPerJob):
404     eventsPerJobRequested = self.eventsPerJob
405     if (self.selectNumberOfJobs):
406     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
407    
408     # If user requested all the events in the dataset
409     if (totalEventsRequested == -1):
410     eventsRemaining=self.maxEvents
411     # If user requested more events than are in the dataset
412     elif (totalEventsRequested > self.maxEvents):
413     eventsRemaining = self.maxEvents
414     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
415     # If user requested less events than are in the dataset
416     else:
417     eventsRemaining = totalEventsRequested
418 slacapra 1.22
419 slacapra 1.41 # If user requested more events per job than are in the dataset
420     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
421     eventsPerJobRequested = self.maxEvents
422    
423 gutsche 1.35 # For user info at end
424     totalEventCount = 0
425 gutsche 1.3
426 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
427     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
428 slacapra 1.22
429 gutsche 1.35 if (self.selectNumberOfJobs):
430     common.logger.message("May not create the exact number_of_jobs requested.")
431 slacapra 1.23
432 gutsche 1.38 if ( self.ncjobs == 'all' ) :
433     totalNumberOfJobs = 999999999
434     else :
435     totalNumberOfJobs = self.ncjobs
436 ewv 1.131
437 gutsche 1.38
438 gutsche 1.35 blocks = blockSites.keys()
439     blockCount = 0
440     # Backup variable in case self.maxEvents counted events in a non-included block
441     numBlocksInDataset = len(blocks)
442 gutsche 1.3
443 gutsche 1.35 jobCount = 0
444     list_of_lists = []
445 gutsche 1.3
446 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
447     jobsOfBlock = {}
448    
449 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
450     # ---- we've met the requested total # of events ---- #
451 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
452 gutsche 1.35 block = blocks[blockCount]
453 gutsche 1.44 blockCount += 1
454 gutsche 1.104 if block not in jobsOfBlock.keys() :
455     jobsOfBlock[block] = []
456 ewv 1.131
457 gutsche 1.68 if self.eventsbyblock.has_key(block) :
458     numEventsInBlock = self.eventsbyblock[block]
459     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
460 ewv 1.131
461 gutsche 1.68 files = self.filesbyblock[block]
462     numFilesInBlock = len(files)
463     if (numFilesInBlock <= 0):
464     continue
465     fileCount = 0
466    
467     # ---- New block => New job ---- #
468 ewv 1.131 parString = ""
469 gutsche 1.68 # counter for number of events in files currently worked on
470     filesEventCount = 0
471     # flag if next while loop should touch new file
472     newFile = 1
473     # job event counter
474     jobSkipEventCount = 0
475 ewv 1.131
476 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
477     # ---- total # of events or we've gone over all the files in this block ---- #
478     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
479     file = files[fileCount]
480     if newFile :
481     try:
482     numEventsInFile = self.eventsbyfile[file]
483     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
484     # increase filesEventCount
485     filesEventCount += numEventsInFile
486     # Add file to current job
487     parString += '\\\"' + file + '\\\"\,'
488     newFile = 0
489     except KeyError:
490     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
491 ewv 1.131
492 gutsche 1.38
493 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
494     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
495     # if last file in block
496     if ( fileCount == numFilesInBlock-1 ) :
497     # end job using last file, use remaining events in block
498     # close job and touch new file
499     fullString = parString[:-2]
500     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
501     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
502     self.jobDestination.append(blockSites[block])
503     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
504 gutsche 1.92 # fill jobs of block dictionary
505 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
506 gutsche 1.68 # reset counter
507     jobCount = jobCount + 1
508     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
509     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
510     jobSkipEventCount = 0
511     # reset file
512 ewv 1.131 parString = ""
513 gutsche 1.68 filesEventCount = 0
514     newFile = 1
515     fileCount += 1
516     else :
517     # go to next file
518     newFile = 1
519     fileCount += 1
520     # if events in file equal to eventsPerJobRequested
521     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
522 gutsche 1.38 # close job and touch new file
523     fullString = parString[:-2]
524 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
525     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
526 gutsche 1.38 self.jobDestination.append(blockSites[block])
527     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
528 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
529 gutsche 1.38 # reset counter
530     jobCount = jobCount + 1
531 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
532     eventsRemaining = eventsRemaining - eventsPerJobRequested
533 gutsche 1.38 jobSkipEventCount = 0
534     # reset file
535 ewv 1.131 parString = ""
536 gutsche 1.38 filesEventCount = 0
537     newFile = 1
538     fileCount += 1
539 ewv 1.131
540 gutsche 1.68 # if more events in file remain than eventsPerJobRequested
541 gutsche 1.38 else :
542 gutsche 1.68 # close job but don't touch new file
543     fullString = parString[:-2]
544     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
545     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
546     self.jobDestination.append(blockSites[block])
547     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
548 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
549 gutsche 1.68 # increase counter
550     jobCount = jobCount + 1
551     totalEventCount = totalEventCount + eventsPerJobRequested
552     eventsRemaining = eventsRemaining - eventsPerJobRequested
553     # calculate skip events for last file
554     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
555     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
556     # remove all but the last file
557     filesEventCount = self.eventsbyfile[file]
558 ewv 1.131 parString = ""
559 gutsche 1.68 parString += '\\\"' + file + '\\\"\,'
560     pass # END if
561     pass # END while (iterate over files in the block)
562 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
563 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
564 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
565 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
566 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
567 ewv 1.131
568 gutsche 1.92 # screen output
569     screenOutput = "List of jobs and available destination sites:\n\n"
570    
571 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
572     noSiteBlock = []
573     bloskNoSite = []
574    
575 gutsche 1.92 blockCounter = 0
576 gutsche 1.104 for block in blocks:
577     if block in jobsOfBlock.keys() :
578     blockCounter += 1
579 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
580 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
581 ewv 1.131 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
582 mcinquil 1.124 bloskNoSite.append( blockCounter )
583 ewv 1.131
584 mcinquil 1.124 common.logger.message(screenOutput)
585 fanzago 1.127 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
586 mcinquil 1.126 msg = 'WARNING: No sites are hosting any part of data for block:\n '
587     virgola = ""
588     if len(bloskNoSite) > 1:
589     virgola = ","
590     for block in bloskNoSite:
591     msg += ' ' + str(block) + virgola
592     msg += '\n Related jobs:\n '
593     virgola = ""
594     if len(noSiteBlock) > 1:
595     virgola = ","
596     for range_jobs in noSiteBlock:
597     msg += str(range_jobs) + virgola
598     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
599     common.logger.message(msg)
600 gutsche 1.92
601 slacapra 1.9 self.list_of_args = list_of_lists
602     return
603    
604 slacapra 1.21 def jobSplittingNoInput(self):
605 slacapra 1.9 """
606     Perform job splitting based on number of event per job
607     """
608     common.logger.debug(5,'Splitting per events')
609 fanzago 1.130
610 ewv 1.131 if (self.selectEventsPerJob):
611 fanzago 1.130 common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
612     if (self.selectNumberOfJobs):
613     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
614     if (self.selectTotalNumberEvents):
615     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
616 slacapra 1.9
617 slacapra 1.10 if (self.total_number_of_events < 0):
618     msg='Cannot split jobs per Events with "-1" as total number of events'
619     raise CrabException(msg)
620    
621 slacapra 1.22 if (self.selectEventsPerJob):
622 spiga 1.65 if (self.selectTotalNumberEvents):
623     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
624 ewv 1.131 elif(self.selectNumberOfJobs) :
625 spiga 1.65 self.total_number_of_jobs =self.theNumberOfJobs
626 ewv 1.131 self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
627 spiga 1.65
628 slacapra 1.22 elif (self.selectNumberOfJobs) :
629     self.total_number_of_jobs = self.theNumberOfJobs
630     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
631 ewv 1.131
632 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
633    
634     # is there any remainder?
635     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
636    
637     common.logger.debug(5,'Check '+str(check))
638    
639 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
640 slacapra 1.9 if check > 0:
641 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
642 slacapra 1.9
643 slacapra 1.10 # argument is seed number.$i
644 slacapra 1.9 self.list_of_args = []
645     for i in range(self.total_number_of_jobs):
646 gutsche 1.35 ## Since there is no input, any site is good
647 ewv 1.131 self.jobDestination.append([""]) #must be empty to write correctly the xml
648 slacapra 1.90 args=[]
649 spiga 1.57 if (self.firstRun):
650 slacapra 1.138 ## pythia first run
651 slacapra 1.90 args.append(str(self.firstRun)+str(i))
652 slacapra 1.23 if (self.sourceSeed):
653 slacapra 1.90 args.append(str(self.sourceSeed)+str(i))
654 slacapra 1.28 if (self.sourceSeedVtx):
655 slacapra 1.90 ## + vtx random seed
656     args.append(str(self.sourceSeedVtx)+str(i))
657     if (self.sourceSeedG4):
658     ## + G4 random seed
659     args.append(str(self.sourceSeedG4)+str(i))
660 ewv 1.131 if (self.sourceSeedMix):
661 slacapra 1.90 ## + Mix random seed
662     args.append(str(self.sourceSeedMix)+str(i))
663     pass
664     pass
665     self.list_of_args.append(args)
666     pass
667 ewv 1.131
668 gutsche 1.3 return
669    
670 spiga 1.42
671     def jobSplittingForScript(self):#CarlosDaniele
672     """
673     Perform job splitting based on number of job
674     """
675     common.logger.debug(5,'Splitting per job')
676     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
677    
678     self.total_number_of_jobs = self.theNumberOfJobs
679    
680     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
681    
682     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
683    
684     # argument is seed number.$i
685     self.list_of_args = []
686     for i in range(self.total_number_of_jobs):
687     ## Since there is no input, any site is good
688     # self.jobDestination.append(["Any"])
689     self.jobDestination.append([""])
690     ## no random seed
691     self.list_of_args.append([str(i)])
692     return
693    
694 gutsche 1.3 def split(self, jobParams):
695 ewv 1.131
696 gutsche 1.3 common.jobDB.load()
697     #### Fabio
698     njobs = self.total_number_of_jobs
699 slacapra 1.9 arglist = self.list_of_args
700 gutsche 1.3 # create the empty structure
701     for i in range(njobs):
702     jobParams.append("")
703 ewv 1.131
704 gutsche 1.3 for job in range(njobs):
705 slacapra 1.17 jobParams[job] = arglist[job]
706     # print str(arglist[job])
707     # print jobParams[job]
708 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
709 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
710     common.jobDB.setDestination(job, self.jobDestination[job])
711 gutsche 1.3
712     common.jobDB.save()
713     return
714 ewv 1.131
715 gutsche 1.3 def getJobTypeArguments(self, nj, sched):
716 slacapra 1.17 result = ''
717     for i in common.jobDB.arguments(nj):
718     result=result+str(i)+" "
719     return result
720 ewv 1.131
721 gutsche 1.3 def numberOfJobs(self):
722     # Fabio
723     return self.total_number_of_jobs
724    
725 slacapra 1.1 def getTarBall(self, exe):
726     """
727     Return the TarBall with lib and exe
728     """
729 ewv 1.131
730 slacapra 1.1 # if it exist, just return it
731 corvo 1.56 #
732     # Marco. Let's start to use relative path for Boss XML files
733     #
734     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
735 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
736     return self.tgzNameWithPath
737    
738     # Prepare a tar gzipped file with user binaries.
739     self.buildTar_(exe)
740    
741     return string.strip(self.tgzNameWithPath)
742    
743     def buildTar_(self, executable):
744    
745     # First of all declare the user Scram area
746     swArea = self.scram.getSWArea_()
747     #print "swArea = ", swArea
748 slacapra 1.63 # swVersion = self.scram.getSWVersion()
749     # print "swVersion = ", swVersion
750 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
751     #print "swReleaseTop = ", swReleaseTop
752 ewv 1.131
753 slacapra 1.1 ## check if working area is release top
754     if swReleaseTop == '' or swArea == swReleaseTop:
755     return
756    
757 slacapra 1.61 import tarfile
758     try: # create tar ball
759     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
760     ## First find the executable
761 slacapra 1.86 if (self.executable != ''):
762 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
763     if ( not exeWithPath ):
764     raise CrabException('User executable '+executable+' not found')
765 ewv 1.131
766 slacapra 1.61 ## then check if it's private or not
767     if exeWithPath.find(swReleaseTop) == -1:
768     # the exe is private, so we must ship
769     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
770     path = swArea+'/'
771 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
772     if exeWithPath.find(path) >= 0 :
773     exe = string.replace(exeWithPath, path,'')
774 slacapra 1.129 tar.add(path+exe,exe)
775 corvo 1.85 else :
776     tar.add(exeWithPath,os.path.basename(executable))
777 slacapra 1.61 pass
778     else:
779     # the exe is from release, we'll find it on WN
780     pass
781 ewv 1.131
782 slacapra 1.61 ## Now get the libraries: only those in local working area
783     libDir = 'lib'
784     lib = swArea+'/' +libDir
785     common.logger.debug(5,"lib "+lib+" to be tarred")
786     if os.path.exists(lib):
787     tar.add(lib,libDir)
788 ewv 1.131
789 slacapra 1.61 ## Now check if module dir is present
790     moduleDir = 'module'
791     module = swArea + '/' + moduleDir
792     if os.path.isdir(module):
793     tar.add(module,moduleDir)
794    
795     ## Now check if any data dir(s) is present
796     swAreaLen=len(swArea)
797     for root, dirs, files in os.walk(swArea):
798     if "data" in dirs:
799     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
800     tar.add(root+"/data",root[swAreaLen:]+"/data")
801    
802     ## Add ProdAgent dir to tar
803     paDir = 'ProdAgentApi'
804     pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
805     if os.path.isdir(pa):
806     tar.add(pa,paDir)
807 fanzago 1.93
808     ### FEDE FOR DBS PUBLICATION
809     ## Add PRODCOMMON dir to tar
810     prodcommonDir = 'ProdCommon'
811     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
812     if os.path.isdir(prodcommonPath):
813     tar.add(prodcommonPath,prodcommonDir)
814 ewv 1.131 #############################
815    
816 slacapra 1.61 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
817     tar.close()
818     except :
819     raise CrabException('Could not create tar-ball')
820 gutsche 1.72
821     ## check for tarball size
822     tarballinfo = os.stat(self.tgzNameWithPath)
823     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
824     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
825    
826 slacapra 1.61 ## create tar-ball with ML stuff
827 ewv 1.131 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
828 slacapra 1.61 try:
829     tar = tarfile.open(self.MLtgzfile, "w:gz")
830     path=os.environ['CRABDIR'] + '/python/'
831     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
832     tar.add(path+file,file)
833     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
834     tar.close()
835     except :
836 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
837 ewv 1.131
838 slacapra 1.1 return
839 ewv 1.131
840 slacapra 1.97 def additionalInputFileTgz(self):
841     """
842     Put all additional files into a tar ball and return its name
843     """
844     import tarfile
845     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
846     tar = tarfile.open(tarName, "w:gz")
847     for file in self.additional_inbox_files:
848     tar.add(file,string.split(file,'/')[-1])
849     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
850     tar.close()
851     return tarName
852    
853 slacapra 1.1 def wsSetupEnvironment(self, nj):
854     """
855     Returns part of a job script which prepares
856     the execution environment for the job 'nj'.
857     """
858     # Prepare JobType-independent part
859 ewv 1.131 txt = ''
860 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
861 ewv 1.131 txt += 'if [ $middleware == LCG ]; then \n'
862 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
863     txt += 'elif [ $middleware == OSG ]; then\n'
864 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
865 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
866 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
867 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
868     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
869     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
870 gutsche 1.3 txt += ' exit 1\n'
871     txt += ' fi\n'
872 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
873 gutsche 1.3 txt += '\n'
874     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
875     txt += ' cd $WORKING_DIR\n'
876 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
877 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
878 fanzago 1.133 #txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
879     #txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
880 gutsche 1.3 txt += 'fi\n'
881 slacapra 1.1
882     # Prepare JobType-specific part
883     scram = self.scram.commandName()
884     txt += '\n\n'
885 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
886     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
887 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
888     txt += 'status=$?\n'
889     txt += 'if [ $status != 0 ] ; then\n'
890 fanzago 1.133 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
891     txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
892     txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
893     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
894 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
895     txt += ' cd $RUNTIME_AREA\n'
896 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
897     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
898 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
899     txt += ' if [ -d $WORKING_DIR ] ;then\n'
900 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
901     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
902     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
903     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
904 gutsche 1.3 txt += ' fi\n'
905     txt += ' fi \n'
906 fanzago 1.133 txt += ' exit 1 \n'
907 slacapra 1.1 txt += 'fi \n'
908     txt += 'cd '+self.version+'\n'
909 fanzago 1.99 ########## FEDE FOR DBS2 ######################
910     txt += 'SOFTWARE_DIR=`pwd`\n'
911 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
912 fanzago 1.99 ###############################################
913 slacapra 1.1 ### needed grep for bug in scramv1 ###
914 corvo 1.58 txt += scram+' runtime -sh\n'
915 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
916 corvo 1.58 txt += 'echo $PATH\n'
917 slacapra 1.1 # Handle the arguments:
918     txt += "\n"
919 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
920 slacapra 1.1 txt += "\n"
921 mcinquil 1.140 txt += "if [ $nargs -lt "+str(len(self.argsList[nj].split()))+" ]\n"
922 slacapra 1.1 txt += "then\n"
923 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
924 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
925 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
926 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
927 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
928     txt += ' cd $RUNTIME_AREA\n'
929 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
930     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
931 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
932     txt += ' if [ -d $WORKING_DIR ] ;then\n'
933 fanzago 1.96 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
934     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
935     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
936     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
937 gutsche 1.3 txt += ' fi\n'
938     txt += ' fi \n'
939 slacapra 1.1 txt += " exit 1\n"
940     txt += "fi\n"
941     txt += "\n"
942    
943     # Prepare job-specific part
944     job = common.job_list[nj]
945 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
946 ewv 1.131 if (self.datasetPath):
947 fanzago 1.93 txt += '\n'
948     txt += 'DatasetPath='+self.datasetPath+'\n'
949    
950     datasetpath_split = self.datasetPath.split("/")
951 ewv 1.131
952 fanzago 1.93 txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
953     txt += 'DataTier='+datasetpath_split[2]+'\n'
954 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
955 fanzago 1.93
956     else:
957     txt += 'DatasetPath=MCDataTier\n'
958     txt += 'PrimaryDataset=null\n'
959     txt += 'DataTier=null\n'
960     txt += 'ApplicationFamily=MCDataTier\n'
961 spiga 1.42 if self.pset != None: #CarlosDaniele
962     pset = os.path.basename(job.configFilename())
963     txt += '\n'
964 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
965 spiga 1.42 if (self.datasetPath): # standard job
966     txt += 'InputFiles=${args[1]}\n'
967     txt += 'MaxEvents=${args[2]}\n'
968     txt += 'SkipEvents=${args[3]}\n'
969     txt += 'echo "Inputfiles:<$InputFiles>"\n'
970 ewv 1.131 txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
971 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
972 ewv 1.131 txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
973 spiga 1.42 txt += 'echo "SkipEvents:<$SkipEvents>"\n'
974 ewv 1.131 txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
975 spiga 1.42 else: # pythia like job
976 slacapra 1.90 seedIndex=1
977     if (self.firstRun):
978     txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
979 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
980 ewv 1.131 txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
981 slacapra 1.90 seedIndex=seedIndex+1
982    
983 spiga 1.57 if (self.sourceSeed):
984 slacapra 1.90 txt += 'Seed=${args['+str(seedIndex)+']}\n'
985 ewv 1.131 txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
986 slacapra 1.90 seedIndex=seedIndex+1
987     ## the following seeds are not always present
988 spiga 1.42 if (self.sourceSeedVtx):
989 slacapra 1.90 txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
990 spiga 1.42 txt += 'echo "VtxSeed: <$VtxSeed>"\n'
991 ewv 1.131 txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
992 slacapra 1.90 seedIndex += 1
993     if (self.sourceSeedG4):
994     txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
995     txt += 'echo "G4Seed: <$G4Seed>"\n'
996 ewv 1.131 txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
997 slacapra 1.90 seedIndex += 1
998     if (self.sourceSeedMix):
999     txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1000     txt += 'echo "MixSeed: <$mixSeed>"\n'
1001 ewv 1.131 txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1002 slacapra 1.90 seedIndex += 1
1003     pass
1004     pass
1005     txt += 'mv -f '+pset+' pset.cfg\n'
1006 slacapra 1.1
1007     if len(self.additional_inbox_files) > 0:
1008 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1009     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1010     txt += 'fi\n'
1011 ewv 1.131 pass
1012 slacapra 1.1
1013 spiga 1.42 if self.pset != None: #CarlosDaniele
1014     txt += '\n'
1015     txt += 'echo "***** cat pset.cfg *********"\n'
1016     txt += 'cat pset.cfg\n'
1017     txt += 'echo "****** end pset.cfg ********"\n'
1018     txt += '\n'
1019 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1020 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1021     txt += 'echo "PSETHASH = $PSETHASH" \n'
1022 ewv 1.131 ##############
1023 fanzago 1.93 txt += '\n'
1024 gutsche 1.3 return txt
1025    
1026 slacapra 1.63 def wsBuildExe(self, nj=0):
1027 gutsche 1.3 """
1028     Put in the script the commands to build an executable
1029     or a library.
1030     """
1031    
1032     txt = ""
1033    
1034     if os.path.isfile(self.tgzNameWithPath):
1035 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
1036 gutsche 1.3 txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1037     txt += 'untar_status=$? \n'
1038     txt += 'if [ $untar_status -ne 0 ]; then \n'
1039     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1040     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
1041 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1042 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
1043     txt += ' cd $RUNTIME_AREA\n'
1044 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1045     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1046 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
1047     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1048 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1049     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
1050     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1051     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1052 gutsche 1.3 txt += ' fi\n'
1053     txt += ' fi \n'
1054     txt += ' \n'
1055 gutsche 1.7 txt += ' exit 1 \n'
1056 gutsche 1.3 txt += 'else \n'
1057     txt += ' echo "Successful untar" \n'
1058     txt += 'fi \n'
1059 gutsche 1.50 txt += '\n'
1060 fanzago 1.133 txt += 'echo ">>> Include ProdAgentApi and PRODCOMMON in PYTHONPATH:"\n'
1061 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1062 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
1063 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1064 gutsche 1.50 txt += 'else\n'
1065 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1066 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1067 ewv 1.131 ###################
1068 gutsche 1.50 txt += 'fi\n'
1069     txt += '\n'
1070    
1071 gutsche 1.3 pass
1072 ewv 1.131
1073 slacapra 1.1 return txt
1074    
1075     def modifySteeringCards(self, nj):
1076     """
1077 ewv 1.131 modify the card provided by the user,
1078 slacapra 1.1 writing a new card into share dir
1079     """
1080 ewv 1.131
1081 slacapra 1.1 def executableName(self):
1082 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1083 spiga 1.42 return "sh "
1084     else:
1085     return self.executable
1086 slacapra 1.1
1087     def executableArgs(self):
1088 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1089 spiga 1.42 return self.scriptExe + " $NJob"
1090 fanzago 1.115 else:
1091 ewv 1.139 # if >= CMSSW_1_5_X, add -j crab_fjr.xml
1092 fanzago 1.115 version_array = self.scram.getSWVersion().split('_')
1093     major = 0
1094     minor = 0
1095     try:
1096     major = int(version_array[1])
1097     minor = int(version_array[2])
1098     except:
1099 ewv 1.131 msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1100 fanzago 1.115 raise CrabException(msg)
1101     if major >= 1 and minor >= 5 :
1102 ewv 1.139 return " -j " + self.fjrFileName + " -p pset.cfg"
1103 fanzago 1.115 else:
1104     return " -p pset.cfg"
1105 slacapra 1.1
1106     def inputSandbox(self, nj):
1107     """
1108     Returns a list of filenames to be put in JDL input sandbox.
1109     """
1110     inp_box = []
1111 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1112     # seen = {}
1113 slacapra 1.1 ## code
1114     if os.path.isfile(self.tgzNameWithPath):
1115     inp_box.append(self.tgzNameWithPath)
1116 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1117     inp_box.append(self.MLtgzfile)
1118 slacapra 1.1 ## config
1119 slacapra 1.70 if not self.pset is None:
1120 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1121 slacapra 1.1 ## additional input files
1122 slacapra 1.97 tgz = self.additionalInputFileTgz()
1123     inp_box.append(tgz)
1124 slacapra 1.1 return inp_box
1125    
1126     def outputSandbox(self, nj):
1127     """
1128     Returns a list of filenames to be put in JDL output sandbox.
1129     """
1130     out_box = []
1131    
1132     ## User Declared output files
1133 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1134 ewv 1.131 n_out = nj + 1
1135 slacapra 1.1 out_box.append(self.numberFile_(out,str(n_out)))
1136     return out_box
1137    
1138     def prepareSteeringCards(self):
1139     """
1140     Make initial modifications of the user's steering card file.
1141     """
1142     return
1143    
1144     def wsRenameOutput(self, nj):
1145     """
1146     Returns part of a job script which renames the produced files.
1147     """
1148    
1149     txt = '\n'
1150 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1151     txt += 'echo ">>> current directory content:"\n'
1152 gutsche 1.7 txt += 'ls \n'
1153 fanzago 1.145 txt += '\n'
1154 slacapra 1.54
1155 fanzago 1.128 txt += 'output_exit_status=0\n'
1156 ewv 1.131
1157 fanzago 1.128 for fileWithSuffix in (self.output_file_sandbox):
1158     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1159     txt += '\n'
1160     txt += '# check output file\n'
1161     txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1162 mcinquil 1.144 txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1163     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1164 fanzago 1.128 txt += 'else\n'
1165     txt += ' exit_status=60302\n'
1166     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1167     if common.scheduler.boss_scheduler_name == 'condor_g':
1168     txt += ' if [ $middleware == OSG ]; then \n'
1169     txt += ' echo "prepare dummy output file"\n'
1170     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1171     txt += ' fi \n'
1172     txt += 'fi\n'
1173 ewv 1.131
1174 fanzago 1.128 for fileWithSuffix in (self.output_file):
1175 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1176     txt += '\n'
1177 gutsche 1.7 txt += '# check output file\n'
1178 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1179 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1180     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
1181     txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1182     else:
1183     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1184     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1185 slacapra 1.106 txt += 'else\n'
1186 fanzago 1.117 txt += ' exit_status=60302\n'
1187     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1188 fanzago 1.128 txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1189     txt += ' output_exit_status=$exit_status\n'
1190 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
1191     txt += ' if [ $middleware == OSG ]; then \n'
1192     txt += ' echo "prepare dummy output file"\n'
1193     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1194     txt += ' fi \n'
1195 slacapra 1.1 txt += 'fi\n'
1196 slacapra 1.105 file_list = []
1197     for fileWithSuffix in (self.output_file):
1198     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1199 ewv 1.131
1200 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1201 fanzago 1.148 txt = '\n'
1202     txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1203     txt += 'echo ">>> current directory content:"\n'
1204     txt += 'ls \n'
1205     txt += '\n'
1206 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1207 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1208 slacapra 1.1 return txt
1209    
1210     def numberFile_(self, file, txt):
1211     """
1212     append _'txt' before last extension of a file
1213     """
1214     p = string.split(file,".")
1215     # take away last extension
1216     name = p[0]
1217     for x in p[1:-1]:
1218 slacapra 1.90 name=name+"."+x
1219 slacapra 1.1 # add "_txt"
1220     if len(p)>1:
1221 slacapra 1.90 ext = p[len(p)-1]
1222     result = name + '_' + txt + "." + ext
1223 slacapra 1.1 else:
1224 slacapra 1.90 result = name + '_' + txt
1225 ewv 1.131
1226 slacapra 1.1 return result
1227    
1228 slacapra 1.63 def getRequirements(self, nj=[]):
1229 slacapra 1.1 """
1230 ewv 1.131 return job requirements to add to jdl files
1231 slacapra 1.1 """
1232     req = ''
1233 slacapra 1.47 if self.version:
1234 slacapra 1.10 req='Member("VO-cms-' + \
1235 slacapra 1.47 self.version + \
1236 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1237 farinafa 1.111 ## SL add requirement for OS version only if SL4
1238     #reSL4 = re.compile( r'slc4' )
1239 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1240 gutsche 1.107 req+=' && Member("VO-cms-' + \
1241 slacapra 1.105 self.executable_arch + \
1242     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1243 gutsche 1.35
1244     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1245    
1246 slacapra 1.1 return req
1247 gutsche 1.3
1248     def configFilename(self):
1249     """ return the config filename """
1250     return self.name()+'.cfg'
1251    
1252     def wsSetupCMSOSGEnvironment_(self):
1253     """
1254     Returns part of a job script which is prepares
1255     the execution environment and which is common for all CMS jobs.
1256     """
1257 fanzago 1.133 txt = ' echo ">>> setup CMS OSG environment:"\n'
1258     txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1259     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1260 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1261 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1262 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1263 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1264     txt += ' else\n'
1265 ewv 1.135 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1266 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1267     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1268     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1269 gutsche 1.3 txt += '\n'
1270 fanzago 1.133 txt += ' cd $RUNTIME_AREA\n'
1271     txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1272     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1273     txt += ' /bin/rm -rf $WORKING_DIR\n'
1274     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1275 ewv 1.135 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1276 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1277     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1278     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1279     txt += ' fi\n'
1280 gutsche 1.3 txt += '\n'
1281 fanzago 1.133 txt += ' exit 1\n'
1282     txt += ' fi\n'
1283 gutsche 1.3 txt += '\n'
1284 fanzago 1.133 txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1285 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1286 gutsche 1.3
1287     return txt
1288 ewv 1.131
1289 gutsche 1.3 ### OLI_DANIELE
1290     def wsSetupCMSLCGEnvironment_(self):
1291     """
1292     Returns part of a job script which is prepares
1293     the execution environment and which is common for all CMS jobs.
1294     """
1295 fanzago 1.133 txt = ' echo ">>> setup CMS LCG environment:"\n'
1296     txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1297     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1298     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1299     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1300     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1301     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1302     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1303     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1304     txt += ' exit 1\n'
1305     txt += ' else\n'
1306     txt += ' echo "Sourcing environment... "\n'
1307     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1308     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1309     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1310     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1311     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1312     txt += ' exit 1\n'
1313     txt += ' fi\n'
1314     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1315     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1316     txt += ' result=$?\n'
1317     txt += ' if [ $result -ne 0 ]; then\n'
1318     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1319     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1320     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1321     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1322     txt += ' exit 1\n'
1323     txt += ' fi\n'
1324     txt += ' fi\n'
1325     txt += ' \n'
1326     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1327 gutsche 1.3 return txt
1328 gutsche 1.5
1329 ewv 1.131 ### FEDE FOR DBS OUTPUT PUBLICATION
1330 fanzago 1.93 def modifyReport(self, nj):
1331     """
1332 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1333 fanzago 1.93 """
1334 fanzago 1.94
1335 ewv 1.131 txt = ''
1336 fanzago 1.94 try:
1337 ewv 1.131 publish_data = int(self.cfg_params['USER.publish_data'])
1338 fanzago 1.94 except KeyError:
1339     publish_data = 0
1340 ewv 1.131 if (publish_data == 1):
1341 fanzago 1.133 txt += 'echo ">>> Modify Job Report:" \n'
1342 fanzago 1.122 ################ FEDE FOR DBS2 #############################################
1343     txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1344     #############################################################################
1345 fanzago 1.94
1346 fanzago 1.122 txt += 'if [ -z "$SE" ]; then\n'
1347     txt += ' SE="" \n'
1348 ewv 1.131 txt += 'fi \n'
1349 fanzago 1.122 txt += 'if [ -z "$SE_PATH" ]; then\n'
1350     txt += ' SE_PATH="" \n'
1351 ewv 1.131 txt += 'fi \n'
1352     txt += 'echo "SE = $SE"\n'
1353 fanzago 1.122 txt += 'echo "SE_PATH = $SE_PATH"\n'
1354 fanzago 1.94
1355     processedDataset = self.cfg_params['USER.publish_data_name']
1356     txt += 'ProcessedDataset='+processedDataset+'\n'
1357     #### LFN=/store/user/<user>/processedDataset_PSETHASH
1358     txt += 'if [ "$SE_PATH" == "" ]; then\n'
1359 fanzago 1.101 #### FEDE: added slash in LFN ##############
1360     txt += ' FOR_LFN=/copy_problems/ \n'
1361 ewv 1.131 txt += 'else \n'
1362 fanzago 1.94 txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1363 fanzago 1.101 ##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1364 fanzago 1.94 txt += ' FOR_LFN=/store$tmp \n'
1365 ewv 1.131 txt += 'fi \n'
1366 fanzago 1.94 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1367     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1368 spiga 1.103 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1369 ewv 1.131 #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1370     txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1371 spiga 1.103 txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1372     #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1373 ewv 1.131
1374 spiga 1.103 txt += 'modifyReport_result=$?\n'
1375     txt += 'echo modifyReport_result = $modifyReport_result\n'
1376     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1377     txt += ' exit_status=1\n'
1378     txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1379     txt += 'else\n'
1380     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1381     txt += 'fi\n'
1382 fanzago 1.94 else:
1383 fanzago 1.122 txt += 'echo "no data publication required"\n'
1384 fanzago 1.93 return txt
1385 fanzago 1.99
1386     def cleanEnv(self):
1387 ewv 1.131 txt = ''
1388     txt += 'if [ $middleware == OSG ]; then\n'
1389 fanzago 1.99 txt += ' cd $RUNTIME_AREA\n'
1390 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1391     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1392 fanzago 1.99 txt += ' /bin/rm -rf $WORKING_DIR\n'
1393     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1394 fanzago 1.133 txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1395     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1396     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1397     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1398 fanzago 1.99 txt += ' fi\n'
1399     txt += 'fi\n'
1400     txt += '\n'
1401     return txt
1402 fanzago 1.93
1403 gutsche 1.5 def setParam_(self, param, value):
1404     self._params[param] = value
1405    
1406     def getParams(self):
1407     return self._params
1408 gutsche 1.8
1409     def setTaskid_(self):
1410     self._taskId = self.cfg_params['taskId']
1411 ewv 1.131
1412 gutsche 1.8 def getTaskid(self):
1413     return self._taskId
1414 gutsche 1.35
1415     def uniquelist(self, old):
1416     """
1417     remove duplicates from a list
1418     """
1419     nd={}
1420     for e in old:
1421     nd[e]=0
1422     return nd.keys()
1423 mcinquil 1.121
1424    
1425     def checkOut(self, limit):
1426     """
1427     check the dimension of the output files
1428     """
1429 mcinquil 1.142 txt = 'echo ">>> Starting output sandbox limit check :"\n'
1430 mcinquil 1.121 allOutFiles = ""
1431     listOutFiles = []
1432 mcinquil 1.143 txt += 'stdoutFile=`ls | grep *stdout` \n'
1433     txt += 'stderrFile=`ls | grep *stderr` \n'
1434 fanzago 1.148 if (self.return_data == 1):
1435     for fileOut in (self.output_file+self.output_file_sandbox):
1436     allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob') + " $stdoutFile $stderrFile"
1437     else:
1438     for fileOut in (self.output_file_sandbox):
1439     txt += 'echo " '+fileOut+'";\n'
1440     allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob') + " $stdoutFile $stderrFile"
1441 mcinquil 1.121 txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1442     txt += 'ls -gGhrta;\n'
1443     txt += 'sum=0;\n'
1444     txt += 'for file in '+str(allOutFiles)+' ; do\n'
1445     txt += ' if [ -e $file ]; then\n'
1446     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1447     txt += ' sum=`expr $sum + $tt`\n'
1448     txt += ' else\n'
1449     txt += ' echo "WARNING: output file $file not found!"\n'
1450     txt += ' fi\n'
1451     txt += 'done\n'
1452     txt += 'echo "Total Output dimension: $sum";\n'
1453     txt += 'limit='+str(limit)+';\n'
1454     txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1455     txt += 'if [ $limit -lt $sum ]; then\n'
1456     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1457     txt += ' echo " checking the output file sizes..."\n'
1458     txt += ' tot=0;\n'
1459 mcinquil 1.143 txt += ' for filefile in '+str(allOutFiles)+' ; do\n'
1460     txt += ' dimFile=`ls -gGrta $filefile | awk \'{ print $3 }\';`\n'
1461 mcinquil 1.121 txt += ' tot=`expr $tot + $tt`;\n'
1462 mcinquil 1.143 txt += ' if [ $limit -lt $dimFile ]; then\n'
1463     txt += ' echo "deleting file: $filefile";\n'
1464     txt += ' rm -f $filefile\n'
1465     txt += ' elif [ $limit -lt $tot ]; then\n'
1466     txt += ' echo "deleting file: $filefile";\n'
1467     txt += ' rm -f $filefile\n'
1468     txt += ' else\n'
1469     txt += ' echo "saving file: $filefile"\n'
1470 mcinquil 1.121 txt += ' fi\n'
1471     txt += ' done\n'
1472 mcinquil 1.143
1473 mcinquil 1.121 txt += ' ls -agGhrt;\n'
1474     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1475     txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1476     txt += ' exit_status=70000;\n'
1477     txt += 'else'
1478     txt += ' echo "Total Output dimension $sum is fine.";\n'
1479     txt += 'fi\n'
1480 fanzago 1.133 txt += 'echo "Ending output sandbox limit check"\n'
1481 mcinquil 1.121 return txt