ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.131
Committed: Fri Oct 19 08:51:49 2007 UTC (17 years, 6 months ago) by ewv
Content type: text/x-python
Branch: MAIN
Changes since 1.130: +116 -123 lines
Log Message:
Remove EdmConfigToPythong dependency

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8    
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 gutsche 1.38 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 gutsche 1.3 self._params = {}
17     self.cfg_params = cfg_params
18 gutsche 1.38
19 fanzago 1.115 # init BlackWhiteListParser
20     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21    
22 gutsche 1.72 try:
23     self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24     except KeyError:
25 slacapra 1.86 self.MaxTarBallSize = 9.5
26 gutsche 1.72
27 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
28 gutsche 1.38 self.ncjobs = ncjobs
29    
30 slacapra 1.1 log = common.logger
31 ewv 1.131
32 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
33     self.additional_inbox_files = []
34     self.scriptExe = ''
35     self.executable = ''
36 slacapra 1.71 self.executable_arch = self.scram.getArch()
37 slacapra 1.1 self.tgz_name = 'default.tgz'
38 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
39 corvo 1.56 self.scriptName = 'CMSSW.sh'
40 ewv 1.131 self.pset = '' #scrip use case Da
41 spiga 1.42 self.datasetPath = '' #scrip use case Da
42 gutsche 1.3
43 gutsche 1.50 # set FJR file name
44     self.fjrFileName = 'crab_fjr.xml'
45    
46 slacapra 1.1 self.version = self.scram.getSWVersion()
47 ewv 1.131
48 spiga 1.114 #
49     # Try to block creation in case of arch/version mismatch
50     #
51    
52     a = string.split(self.version, "_")
53    
54     if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
56     raise CrabException(msg)
57     if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59     raise CrabException(msg)
60 ewv 1.131
61 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
62 gutsche 1.5 self.setParam_('application', self.version)
63 slacapra 1.47
64 slacapra 1.1 ### collect Data cards
65 gutsche 1.66
66     ## get DBS mode
67     try:
68 slacapra 1.86 self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 gutsche 1.66 except KeyError:
70 slacapra 1.86 self.use_dbs_1 = 0
71 ewv 1.131
72 slacapra 1.1 try:
73 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
74     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
75     if string.lower(tmp)=='none':
76     self.datasetPath = None
77 slacapra 1.21 self.selectNoInput = 1
78 slacapra 1.9 else:
79     self.datasetPath = tmp
80 slacapra 1.21 self.selectNoInput = 0
81 slacapra 1.1 except KeyError:
82 ewv 1.131 msg = "Error: datasetpath not defined "
83 slacapra 1.1 raise CrabException(msg)
84 gutsche 1.5
85     # ML monitoring
86     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
87 slacapra 1.9 if not self.datasetPath:
88     self.setParam_('dataset', 'None')
89     self.setParam_('owner', 'None')
90     else:
91 gutsche 1.92 try:
92     datasetpath_split = self.datasetPath.split("/")
93     # standard style
94 mcinquil 1.120 self.setParam_('datasetFull', self.datasetPath)
95 gutsche 1.92 if self.use_dbs_1 == 1 :
96     self.setParam_('dataset', datasetpath_split[1])
97     self.setParam_('owner', datasetpath_split[-1])
98     else:
99     self.setParam_('dataset', datasetpath_split[1])
100     self.setParam_('owner', datasetpath_split[2])
101     except:
102     self.setParam_('dataset', self.datasetPath)
103     self.setParam_('owner', self.datasetPath)
104 ewv 1.131
105 gutsche 1.8 self.setTaskid_()
106     self.setParam_('taskId', self.cfg_params['taskId'])
107 gutsche 1.5
108 slacapra 1.1 self.dataTiers = []
109    
110     ## now the application
111     try:
112     self.executable = cfg_params['CMSSW.executable']
113 gutsche 1.5 self.setParam_('exe', self.executable)
114 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
115     msg = "Default executable cmsRun overridden. Switch to " + self.executable
116     log.debug(3,msg)
117     except KeyError:
118     self.executable = 'cmsRun'
119 gutsche 1.5 self.setParam_('exe', self.executable)
120 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
121     log.debug(3,msg)
122     pass
123    
124     try:
125     self.pset = cfg_params['CMSSW.pset']
126     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 ewv 1.131 if self.pset.lower() != 'none' :
128 spiga 1.42 if (not os.path.exists(self.pset)):
129     raise CrabException("User defined PSet file "+self.pset+" does not exist")
130     else:
131     self.pset = None
132 slacapra 1.1 except KeyError:
133     raise CrabException("PSet file missing. Cannot run cmsRun ")
134    
135     # output files
136 slacapra 1.53 ## stuff which must be returned always via sandbox
137     self.output_file_sandbox = []
138    
139     # add fjr report by default via sandbox
140     self.output_file_sandbox.append(self.fjrFileName)
141    
142     # other output files to be returned via sandbox or copied to SE
143 slacapra 1.1 try:
144     self.output_file = []
145     tmp = cfg_params['CMSSW.output_file']
146     if tmp != '':
147     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
148     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
149     for tmp in tmpOutFiles:
150     tmp=string.strip(tmp)
151     self.output_file.append(tmp)
152     pass
153     else:
154 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155 slacapra 1.1 pass
156     pass
157     except KeyError:
158 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159 slacapra 1.1 pass
160    
161     # script_exe file as additional file in inputSandbox
162     try:
163 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
164     if self.scriptExe != '':
165     if not os.path.isfile(self.scriptExe):
166 slacapra 1.64 msg ="ERROR. file "+self.scriptExe+" not found"
167 slacapra 1.10 raise CrabException(msg)
168 spiga 1.42 self.additional_inbox_files.append(string.strip(self.scriptExe))
169 slacapra 1.1 except KeyError:
170 spiga 1.42 self.scriptExe = ''
171 slacapra 1.70
172 spiga 1.42 #CarlosDaniele
173     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174 slacapra 1.70 msg ="Error. script_exe not defined"
175 spiga 1.42 raise CrabException(msg)
176    
177 slacapra 1.1 ## additional input files
178     try:
179 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
180 slacapra 1.70 for tmp in tmpAddFiles:
181     tmp = string.strip(tmp)
182     dirname = ''
183     if not tmp[0]=="/": dirname = "."
184 corvo 1.85 files = []
185     if string.find(tmp,"*")>-1:
186     files = glob.glob(os.path.join(dirname, tmp))
187     if len(files)==0:
188     raise CrabException("No additional input file found with this pattern: "+tmp)
189     else:
190     files.append(tmp)
191 slacapra 1.70 for file in files:
192     if not os.path.exists(file):
193     raise CrabException("Additional input file not found: "+file)
194 slacapra 1.45 pass
195 slacapra 1.105 # fname = string.split(file, '/')[-1]
196     # storedFile = common.work_space.pathForTgz()+'share/'+fname
197     # shutil.copyfile(file, storedFile)
198     self.additional_inbox_files.append(string.strip(file))
199 slacapra 1.1 pass
200     pass
201 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
202 slacapra 1.1 except KeyError:
203     pass
204    
205 slacapra 1.9 # files per job
206 slacapra 1.1 try:
207 gutsche 1.35 if (cfg_params['CMSSW.files_per_jobs']):
208     raise CrabException("files_per_jobs no longer supported. Quitting.")
209 gutsche 1.3 except KeyError:
210 gutsche 1.35 pass
211 gutsche 1.3
212 slacapra 1.9 ## Events per job
213 gutsche 1.3 try:
214 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
215 slacapra 1.9 self.selectEventsPerJob = 1
216 gutsche 1.3 except KeyError:
217 slacapra 1.9 self.eventsPerJob = -1
218     self.selectEventsPerJob = 0
219 ewv 1.131
220 slacapra 1.22 ## number of jobs
221     try:
222     self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
223     self.selectNumberOfJobs = 1
224     except KeyError:
225     self.theNumberOfJobs = 0
226     self.selectNumberOfJobs = 0
227 slacapra 1.10
228 gutsche 1.35 try:
229     self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
230     self.selectTotalNumberEvents = 1
231     except KeyError:
232     self.total_number_of_events = 0
233     self.selectTotalNumberEvents = 0
234    
235 ewv 1.131 if self.pset != None: #CarlosDaniele
236 spiga 1.42 if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
237     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
238     raise CrabException(msg)
239     else:
240     if (self.selectNumberOfJobs == 0):
241     msg = 'Must specify number_of_jobs.'
242     raise CrabException(msg)
243 gutsche 1.35
244 slacapra 1.22 ## source seed for pythia
245     try:
246     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
247     except KeyError:
248 slacapra 1.23 self.sourceSeed = None
249     common.logger.debug(5,"No seed given")
250 slacapra 1.22
251 slacapra 1.28 try:
252     self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
253     except KeyError:
254     self.sourceSeedVtx = None
255     common.logger.debug(5,"No vertex seed given")
256 slacapra 1.90
257     try:
258     self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259     except KeyError:
260     self.sourceSeedG4 = None
261     common.logger.debug(5,"No g4 sim hits seed given")
262    
263     try:
264     self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265     except KeyError:
266     self.sourceSeedMix = None
267     common.logger.debug(5,"No mix seed given")
268    
269 spiga 1.57 try:
270     self.firstRun = int(cfg_params['CMSSW.first_run'])
271     except KeyError:
272     self.firstRun = None
273     common.logger.debug(5,"No first run given")
274 spiga 1.42 if self.pset != None: #CarlosDaniele
275 ewv 1.131 import PsetManipulator as pp
276 slacapra 1.97 PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
277 gutsche 1.3
278 slacapra 1.1 #DBSDLS-start
279 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
280 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
281     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
282 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
283 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
284 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
285 gutsche 1.35 blockSites = {}
286 slacapra 1.9 if self.datasetPath:
287 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
288 ewv 1.131 #DBSDLS-end
289 slacapra 1.1
290     self.tgzNameWithPath = self.getTarBall(self.executable)
291 ewv 1.131
292 slacapra 1.9 ## Select Splitting
293 ewv 1.131 if self.selectNoInput:
294 spiga 1.42 if self.pset == None: #CarlosDaniele
295     self.jobSplittingForScript()
296     else:
297     self.jobSplittingNoInput()
298 gutsche 1.92 else:
299 corvo 1.56 self.jobSplittingByBlocks(blockSites)
300 gutsche 1.5
301 slacapra 1.22 # modify Pset
302 spiga 1.42 if self.pset != None: #CarlosDaniele
303 slacapra 1.86 try:
304     if (self.datasetPath): # standard job
305     # allow to processa a fraction of events in a file
306 ewv 1.131 PsetEdit.inputModule("INPUTFILE")
307     PsetEdit.maxEvent(0)
308     PsetEdit.skipEvent(0)
309 slacapra 1.86 else: # pythia like job
310 slacapra 1.90 PsetEdit.maxEvent(self.eventsPerJob)
311 slacapra 1.86 if (self.firstRun):
312 ewv 1.131 PsetEdit.pythiaFirstRun(0) #First Run
313 slacapra 1.86 if (self.sourceSeed) :
314 ewv 1.131 PsetEdit.pythiaSeed(0)
315 slacapra 1.86 if (self.sourceSeedVtx) :
316 ewv 1.131 PsetEdit.vtxSeed(0)
317 slacapra 1.90 if (self.sourceSeedG4) :
318 ewv 1.131 PsetEdit.g4Seed(0)
319 slacapra 1.90 if (self.sourceSeedMix) :
320 ewv 1.131 PsetEdit.mixSeed(0)
321 slacapra 1.86 # add FrameworkJobReport to parameter-set
322 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
323     PsetEdit.psetWriter(self.configFilename())
324 slacapra 1.86 except:
325     msg='Error while manipuliating ParameterSet: exiting...'
326     raise CrabException(msg)
327 gutsche 1.3
328 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
329    
330 slacapra 1.86 import DataDiscovery
331     import DataDiscovery_DBS2
332     import DataLocation
333 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
334    
335     datasetPath=self.datasetPath
336    
337 slacapra 1.1 ## Contact the DBS
338 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
339 slacapra 1.1 try:
340 gutsche 1.66
341 slacapra 1.86 if self.use_dbs_1 == 1 :
342     self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
343     else :
344 corvo 1.85 self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
345 slacapra 1.1 self.pubdata.fetchDBSInfo()
346    
347 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
348 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
349     raise CrabException(msg)
350 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
351 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
352     raise CrabException(msg)
353 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
354 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
355 slacapra 1.1 raise CrabException(msg)
356 gutsche 1.67 except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
357     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
358     raise CrabException(msg)
359     except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
360     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
361     raise CrabException(msg)
362     except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
363     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
364     raise CrabException(msg)
365 slacapra 1.1
366 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
367 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
368     self.eventsbyfile=self.pubdata.getEventsPerFile()
369 gutsche 1.3
370 slacapra 1.1 ## get max number of events
371 ewv 1.131 self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
372 slacapra 1.1
373     ## Contact the DLS and build a list of sites hosting the fileblocks
374     try:
375 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
376 gutsche 1.6 dataloc.fetchDLSInfo()
377 slacapra 1.41 except DataLocation.DataLocationError , ex:
378 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
379     raise CrabException(msg)
380 ewv 1.131
381 slacapra 1.1
382 gutsche 1.35 sites = dataloc.getSites()
383     allSites = []
384     listSites = sites.values()
385 slacapra 1.63 for listSite in listSites:
386     for oneSite in listSite:
387 gutsche 1.35 allSites.append(oneSite)
388     allSites = self.uniquelist(allSites)
389 gutsche 1.3
390 gutsche 1.92 # screen output
391     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
392    
393 gutsche 1.35 return sites
394 ewv 1.131
395 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
396 slacapra 1.9 """
397 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
398     and no more than one block.
399     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
400     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
401     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
402     self.maxEvents, self.filesbyblock
403     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
404     self.total_number_of_jobs - Total # of jobs
405     self.list_of_args - File(s) job will run on (a list of lists)
406     """
407    
408     # ---- Handle the possible job splitting configurations ---- #
409     if (self.selectTotalNumberEvents):
410     totalEventsRequested = self.total_number_of_events
411     if (self.selectEventsPerJob):
412     eventsPerJobRequested = self.eventsPerJob
413     if (self.selectNumberOfJobs):
414     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
415    
416     # If user requested all the events in the dataset
417     if (totalEventsRequested == -1):
418     eventsRemaining=self.maxEvents
419     # If user requested more events than are in the dataset
420     elif (totalEventsRequested > self.maxEvents):
421     eventsRemaining = self.maxEvents
422     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
423     # If user requested less events than are in the dataset
424     else:
425     eventsRemaining = totalEventsRequested
426 slacapra 1.22
427 slacapra 1.41 # If user requested more events per job than are in the dataset
428     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
429     eventsPerJobRequested = self.maxEvents
430    
431 gutsche 1.35 # For user info at end
432     totalEventCount = 0
433 gutsche 1.3
434 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
435     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
436 slacapra 1.22
437 gutsche 1.35 if (self.selectNumberOfJobs):
438     common.logger.message("May not create the exact number_of_jobs requested.")
439 slacapra 1.23
440 gutsche 1.38 if ( self.ncjobs == 'all' ) :
441     totalNumberOfJobs = 999999999
442     else :
443     totalNumberOfJobs = self.ncjobs
444 ewv 1.131
445 gutsche 1.38
446 gutsche 1.35 blocks = blockSites.keys()
447     blockCount = 0
448     # Backup variable in case self.maxEvents counted events in a non-included block
449     numBlocksInDataset = len(blocks)
450 gutsche 1.3
451 gutsche 1.35 jobCount = 0
452     list_of_lists = []
453 gutsche 1.3
454 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
455     jobsOfBlock = {}
456    
457 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
458     # ---- we've met the requested total # of events ---- #
459 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
460 gutsche 1.35 block = blocks[blockCount]
461 gutsche 1.44 blockCount += 1
462 gutsche 1.104 if block not in jobsOfBlock.keys() :
463     jobsOfBlock[block] = []
464 ewv 1.131
465 gutsche 1.68 if self.eventsbyblock.has_key(block) :
466     numEventsInBlock = self.eventsbyblock[block]
467     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
468 ewv 1.131
469 gutsche 1.68 files = self.filesbyblock[block]
470     numFilesInBlock = len(files)
471     if (numFilesInBlock <= 0):
472     continue
473     fileCount = 0
474    
475     # ---- New block => New job ---- #
476 ewv 1.131 parString = ""
477 gutsche 1.68 # counter for number of events in files currently worked on
478     filesEventCount = 0
479     # flag if next while loop should touch new file
480     newFile = 1
481     # job event counter
482     jobSkipEventCount = 0
483 ewv 1.131
484 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
485     # ---- total # of events or we've gone over all the files in this block ---- #
486     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
487     file = files[fileCount]
488     if newFile :
489     try:
490     numEventsInFile = self.eventsbyfile[file]
491     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
492     # increase filesEventCount
493     filesEventCount += numEventsInFile
494     # Add file to current job
495     parString += '\\\"' + file + '\\\"\,'
496     newFile = 0
497     except KeyError:
498     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
499 ewv 1.131
500 gutsche 1.38
501 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
502     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
503     # if last file in block
504     if ( fileCount == numFilesInBlock-1 ) :
505     # end job using last file, use remaining events in block
506     # close job and touch new file
507     fullString = parString[:-2]
508     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
509     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
510     self.jobDestination.append(blockSites[block])
511     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
512 gutsche 1.92 # fill jobs of block dictionary
513 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
514 gutsche 1.68 # reset counter
515     jobCount = jobCount + 1
516     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
517     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
518     jobSkipEventCount = 0
519     # reset file
520 ewv 1.131 parString = ""
521 gutsche 1.68 filesEventCount = 0
522     newFile = 1
523     fileCount += 1
524     else :
525     # go to next file
526     newFile = 1
527     fileCount += 1
528     # if events in file equal to eventsPerJobRequested
529     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
530 gutsche 1.38 # close job and touch new file
531     fullString = parString[:-2]
532 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
533     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
534 gutsche 1.38 self.jobDestination.append(blockSites[block])
535     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
536 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
537 gutsche 1.38 # reset counter
538     jobCount = jobCount + 1
539 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
540     eventsRemaining = eventsRemaining - eventsPerJobRequested
541 gutsche 1.38 jobSkipEventCount = 0
542     # reset file
543 ewv 1.131 parString = ""
544 gutsche 1.38 filesEventCount = 0
545     newFile = 1
546     fileCount += 1
547 ewv 1.131
548 gutsche 1.68 # if more events in file remain than eventsPerJobRequested
549 gutsche 1.38 else :
550 gutsche 1.68 # close job but don't touch new file
551     fullString = parString[:-2]
552     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
553     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
554     self.jobDestination.append(blockSites[block])
555     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
556 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
557 gutsche 1.68 # increase counter
558     jobCount = jobCount + 1
559     totalEventCount = totalEventCount + eventsPerJobRequested
560     eventsRemaining = eventsRemaining - eventsPerJobRequested
561     # calculate skip events for last file
562     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
563     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
564     # remove all but the last file
565     filesEventCount = self.eventsbyfile[file]
566 ewv 1.131 parString = ""
567 gutsche 1.68 parString += '\\\"' + file + '\\\"\,'
568     pass # END if
569     pass # END while (iterate over files in the block)
570 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
571 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
572 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
573 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
574 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
575 ewv 1.131
576 gutsche 1.92 # screen output
577     screenOutput = "List of jobs and available destination sites:\n\n"
578    
579 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
580     noSiteBlock = []
581     bloskNoSite = []
582    
583 gutsche 1.92 blockCounter = 0
584 gutsche 1.104 for block in blocks:
585     if block in jobsOfBlock.keys() :
586     blockCounter += 1
587 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
588 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
589 ewv 1.131 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
590 mcinquil 1.124 bloskNoSite.append( blockCounter )
591 ewv 1.131
592 mcinquil 1.124 common.logger.message(screenOutput)
593 fanzago 1.127 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
594 mcinquil 1.126 msg = 'WARNING: No sites are hosting any part of data for block:\n '
595     virgola = ""
596     if len(bloskNoSite) > 1:
597     virgola = ","
598     for block in bloskNoSite:
599     msg += ' ' + str(block) + virgola
600     msg += '\n Related jobs:\n '
601     virgola = ""
602     if len(noSiteBlock) > 1:
603     virgola = ","
604     for range_jobs in noSiteBlock:
605     msg += str(range_jobs) + virgola
606     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
607     common.logger.message(msg)
608 gutsche 1.92
609 slacapra 1.9 self.list_of_args = list_of_lists
610     return
611    
612 slacapra 1.21 def jobSplittingNoInput(self):
613 slacapra 1.9 """
614     Perform job splitting based on number of event per job
615     """
616     common.logger.debug(5,'Splitting per events')
617 fanzago 1.130
618 ewv 1.131 if (self.selectEventsPerJob):
619 fanzago 1.130 common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
620     if (self.selectNumberOfJobs):
621     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
622     if (self.selectTotalNumberEvents):
623     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
624 slacapra 1.9
625 slacapra 1.10 if (self.total_number_of_events < 0):
626     msg='Cannot split jobs per Events with "-1" as total number of events'
627     raise CrabException(msg)
628    
629 slacapra 1.22 if (self.selectEventsPerJob):
630 spiga 1.65 if (self.selectTotalNumberEvents):
631     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
632 ewv 1.131 elif(self.selectNumberOfJobs) :
633 spiga 1.65 self.total_number_of_jobs =self.theNumberOfJobs
634 ewv 1.131 self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
635 spiga 1.65
636 slacapra 1.22 elif (self.selectNumberOfJobs) :
637     self.total_number_of_jobs = self.theNumberOfJobs
638     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
639 ewv 1.131
640 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
641    
642     # is there any remainder?
643     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
644    
645     common.logger.debug(5,'Check '+str(check))
646    
647 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
648 slacapra 1.9 if check > 0:
649 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
650 slacapra 1.9
651 slacapra 1.10 # argument is seed number.$i
652 slacapra 1.9 self.list_of_args = []
653     for i in range(self.total_number_of_jobs):
654 gutsche 1.35 ## Since there is no input, any site is good
655 slacapra 1.86 # self.jobDestination.append(["Any"])
656 ewv 1.131 self.jobDestination.append([""]) #must be empty to write correctly the xml
657 slacapra 1.90 args=[]
658 spiga 1.57 if (self.firstRun):
659     ## pythia first run
660 slacapra 1.86 #self.list_of_args.append([(str(self.firstRun)+str(i))])
661 slacapra 1.90 args.append(str(self.firstRun)+str(i))
662 spiga 1.57 else:
663     ## no first run
664 slacapra 1.86 #self.list_of_args.append([str(i)])
665 slacapra 1.90 args.append(str(i))
666 slacapra 1.23 if (self.sourceSeed):
667 slacapra 1.90 args.append(str(self.sourceSeed)+str(i))
668 slacapra 1.28 if (self.sourceSeedVtx):
669 slacapra 1.90 ## + vtx random seed
670     args.append(str(self.sourceSeedVtx)+str(i))
671     if (self.sourceSeedG4):
672     ## + G4 random seed
673     args.append(str(self.sourceSeedG4)+str(i))
674 ewv 1.131 if (self.sourceSeedMix):
675 slacapra 1.90 ## + Mix random seed
676     args.append(str(self.sourceSeedMix)+str(i))
677     pass
678     pass
679     self.list_of_args.append(args)
680     pass
681 ewv 1.131
682 slacapra 1.90 # print self.list_of_args
683 gutsche 1.3
684     return
685    
686 spiga 1.42
687     def jobSplittingForScript(self):#CarlosDaniele
688     """
689     Perform job splitting based on number of job
690     """
691     common.logger.debug(5,'Splitting per job')
692     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
693    
694     self.total_number_of_jobs = self.theNumberOfJobs
695    
696     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
697    
698     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
699    
700     # argument is seed number.$i
701     self.list_of_args = []
702     for i in range(self.total_number_of_jobs):
703     ## Since there is no input, any site is good
704     # self.jobDestination.append(["Any"])
705     self.jobDestination.append([""])
706     ## no random seed
707     self.list_of_args.append([str(i)])
708     return
709    
710 gutsche 1.3 def split(self, jobParams):
711 ewv 1.131
712 gutsche 1.3 common.jobDB.load()
713     #### Fabio
714     njobs = self.total_number_of_jobs
715 slacapra 1.9 arglist = self.list_of_args
716 gutsche 1.3 # create the empty structure
717     for i in range(njobs):
718     jobParams.append("")
719 ewv 1.131
720 gutsche 1.3 for job in range(njobs):
721 slacapra 1.17 jobParams[job] = arglist[job]
722     # print str(arglist[job])
723     # print jobParams[job]
724 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
725 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
726     common.jobDB.setDestination(job, self.jobDestination[job])
727 gutsche 1.3
728     common.jobDB.save()
729     return
730 ewv 1.131
731 gutsche 1.3 def getJobTypeArguments(self, nj, sched):
732 slacapra 1.17 result = ''
733     for i in common.jobDB.arguments(nj):
734     result=result+str(i)+" "
735     return result
736 ewv 1.131
737 gutsche 1.3 def numberOfJobs(self):
738     # Fabio
739     return self.total_number_of_jobs
740    
741 slacapra 1.1 def getTarBall(self, exe):
742     """
743     Return the TarBall with lib and exe
744     """
745 ewv 1.131
746 slacapra 1.1 # if it exist, just return it
747 corvo 1.56 #
748     # Marco. Let's start to use relative path for Boss XML files
749     #
750     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
751 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
752     return self.tgzNameWithPath
753    
754     # Prepare a tar gzipped file with user binaries.
755     self.buildTar_(exe)
756    
757     return string.strip(self.tgzNameWithPath)
758    
759     def buildTar_(self, executable):
760    
761     # First of all declare the user Scram area
762     swArea = self.scram.getSWArea_()
763     #print "swArea = ", swArea
764 slacapra 1.63 # swVersion = self.scram.getSWVersion()
765     # print "swVersion = ", swVersion
766 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
767     #print "swReleaseTop = ", swReleaseTop
768 ewv 1.131
769 slacapra 1.1 ## check if working area is release top
770     if swReleaseTop == '' or swArea == swReleaseTop:
771     return
772    
773 slacapra 1.61 import tarfile
774     try: # create tar ball
775     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
776     ## First find the executable
777 slacapra 1.86 if (self.executable != ''):
778 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
779     if ( not exeWithPath ):
780     raise CrabException('User executable '+executable+' not found')
781 ewv 1.131
782 slacapra 1.61 ## then check if it's private or not
783     if exeWithPath.find(swReleaseTop) == -1:
784     # the exe is private, so we must ship
785     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
786     path = swArea+'/'
787 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
788     if exeWithPath.find(path) >= 0 :
789     exe = string.replace(exeWithPath, path,'')
790 slacapra 1.129 tar.add(path+exe,exe)
791 corvo 1.85 else :
792     tar.add(exeWithPath,os.path.basename(executable))
793 slacapra 1.61 pass
794     else:
795     # the exe is from release, we'll find it on WN
796     pass
797 ewv 1.131
798 slacapra 1.61 ## Now get the libraries: only those in local working area
799     libDir = 'lib'
800     lib = swArea+'/' +libDir
801     common.logger.debug(5,"lib "+lib+" to be tarred")
802     if os.path.exists(lib):
803     tar.add(lib,libDir)
804 ewv 1.131
805 slacapra 1.61 ## Now check if module dir is present
806     moduleDir = 'module'
807     module = swArea + '/' + moduleDir
808     if os.path.isdir(module):
809     tar.add(module,moduleDir)
810    
811     ## Now check if any data dir(s) is present
812     swAreaLen=len(swArea)
813     for root, dirs, files in os.walk(swArea):
814     if "data" in dirs:
815     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
816     tar.add(root+"/data",root[swAreaLen:]+"/data")
817    
818     ## Add ProdAgent dir to tar
819     paDir = 'ProdAgentApi'
820     pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
821     if os.path.isdir(pa):
822     tar.add(pa,paDir)
823 fanzago 1.93
824     ### FEDE FOR DBS PUBLICATION
825     ## Add PRODCOMMON dir to tar
826     prodcommonDir = 'ProdCommon'
827     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
828     if os.path.isdir(prodcommonPath):
829     tar.add(prodcommonPath,prodcommonDir)
830 ewv 1.131 #############################
831    
832 slacapra 1.61 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
833     tar.close()
834     except :
835     raise CrabException('Could not create tar-ball')
836 gutsche 1.72
837     ## check for tarball size
838     tarballinfo = os.stat(self.tgzNameWithPath)
839     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
840     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
841    
842 slacapra 1.61 ## create tar-ball with ML stuff
843 ewv 1.131 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
844 slacapra 1.61 try:
845     tar = tarfile.open(self.MLtgzfile, "w:gz")
846     path=os.environ['CRABDIR'] + '/python/'
847     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
848     tar.add(path+file,file)
849     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
850     tar.close()
851     except :
852 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
853 ewv 1.131
854 slacapra 1.1 return
855 ewv 1.131
856 slacapra 1.97 def additionalInputFileTgz(self):
857     """
858     Put all additional files into a tar ball and return its name
859     """
860     import tarfile
861     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
862     tar = tarfile.open(tarName, "w:gz")
863     for file in self.additional_inbox_files:
864     tar.add(file,string.split(file,'/')[-1])
865     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
866     tar.close()
867     return tarName
868    
869 slacapra 1.1 def wsSetupEnvironment(self, nj):
870     """
871     Returns part of a job script which prepares
872     the execution environment for the job 'nj'.
873     """
874     # Prepare JobType-independent part
875 ewv 1.131 txt = ''
876    
877 gutsche 1.3 ## OLI_Daniele at this level middleware already known
878    
879 ewv 1.131 txt += 'if [ $middleware == LCG ]; then \n'
880 fanzago 1.115 txt += ' echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
881     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
882     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
883 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
884     txt += 'elif [ $middleware == OSG ]; then\n'
885 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
886     txt += ' echo "Created working directory: $WORKING_DIR"\n'
887 gutsche 1.3 txt += ' if [ ! -d $WORKING_DIR ] ;then\n'
888 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
889 slacapra 1.90 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
890     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
891     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
892 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
893     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
894     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
895 gutsche 1.3 txt += ' exit 1\n'
896     txt += ' fi\n'
897     txt += '\n'
898     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
899     txt += ' cd $WORKING_DIR\n'
900 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
901 fanzago 1.115 txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
902     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
903 gutsche 1.3 txt += 'fi\n'
904 slacapra 1.1
905     # Prepare JobType-specific part
906     scram = self.scram.commandName()
907     txt += '\n\n'
908     txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
909     txt += scram+' project CMSSW '+self.version+'\n'
910     txt += 'status=$?\n'
911     txt += 'if [ $status != 0 ] ; then\n'
912 gutsche 1.7 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
913 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
914 gutsche 1.7 txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
915 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
916 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
917     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
918     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
919 gutsche 1.3 ## OLI_Daniele
920     txt += ' if [ $middleware == OSG ]; then \n'
921     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
922     txt += ' cd $RUNTIME_AREA\n'
923     txt += ' /bin/rm -rf $WORKING_DIR\n'
924     txt += ' if [ -d $WORKING_DIR ] ;then\n'
925 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
926     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
927     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
928     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
929 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
930     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
931     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
932 gutsche 1.3 txt += ' fi\n'
933     txt += ' fi \n'
934     txt += ' exit 1 \n'
935 slacapra 1.1 txt += 'fi \n'
936     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
937     txt += 'cd '+self.version+'\n'
938 fanzago 1.99 ########## FEDE FOR DBS2 ######################
939     txt += 'SOFTWARE_DIR=`pwd`\n'
940     txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
941     ###############################################
942 slacapra 1.1 ### needed grep for bug in scramv1 ###
943 corvo 1.58 txt += scram+' runtime -sh\n'
944 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
945 corvo 1.58 txt += 'echo $PATH\n'
946 slacapra 1.1
947     # Handle the arguments:
948     txt += "\n"
949 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
950 slacapra 1.1 txt += "\n"
951 mkirn 1.32 # txt += "narg=$#\n"
952     txt += "if [ $nargs -lt 2 ]\n"
953 slacapra 1.1 txt += "then\n"
954 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
955 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
956 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
957 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
958 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
959     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
960     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
961 gutsche 1.3 ## OLI_Daniele
962     txt += ' if [ $middleware == OSG ]; then \n'
963     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
964     txt += ' cd $RUNTIME_AREA\n'
965     txt += ' /bin/rm -rf $WORKING_DIR\n'
966     txt += ' if [ -d $WORKING_DIR ] ;then\n'
967 fanzago 1.96 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
968     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
969     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
970     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
971 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
972     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
973     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
974 gutsche 1.3 txt += ' fi\n'
975     txt += ' fi \n'
976 slacapra 1.1 txt += " exit 1\n"
977     txt += "fi\n"
978     txt += "\n"
979    
980     # Prepare job-specific part
981     job = common.job_list[nj]
982 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
983 ewv 1.131 if (self.datasetPath):
984 fanzago 1.93 txt += '\n'
985     txt += 'DatasetPath='+self.datasetPath+'\n'
986    
987     datasetpath_split = self.datasetPath.split("/")
988 ewv 1.131
989 fanzago 1.93 txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
990     txt += 'DataTier='+datasetpath_split[2]+'\n'
991 fanzago 1.96 #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
992     txt += 'ApplicationFamily=cmsRun\n'
993 fanzago 1.93
994     else:
995     txt += 'DatasetPath=MCDataTier\n'
996     txt += 'PrimaryDataset=null\n'
997     txt += 'DataTier=null\n'
998 fanzago 1.96 #txt += 'ProcessedDataset=null\n'
999 fanzago 1.93 txt += 'ApplicationFamily=MCDataTier\n'
1000 spiga 1.42 if self.pset != None: #CarlosDaniele
1001     pset = os.path.basename(job.configFilename())
1002     txt += '\n'
1003 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
1004 spiga 1.42 if (self.datasetPath): # standard job
1005     #txt += 'InputFiles=$2\n'
1006     txt += 'InputFiles=${args[1]}\n'
1007     txt += 'MaxEvents=${args[2]}\n'
1008     txt += 'SkipEvents=${args[3]}\n'
1009     txt += 'echo "Inputfiles:<$InputFiles>"\n'
1010 ewv 1.131 txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1011 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1012 ewv 1.131 txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1013 spiga 1.42 txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1014 ewv 1.131 txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1015 spiga 1.42 else: # pythia like job
1016 slacapra 1.90 seedIndex=1
1017     if (self.firstRun):
1018     txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
1019 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
1020 ewv 1.131 txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1021 slacapra 1.90 seedIndex=seedIndex+1
1022    
1023 spiga 1.57 if (self.sourceSeed):
1024 slacapra 1.90 txt += 'Seed=${args['+str(seedIndex)+']}\n'
1025 ewv 1.131 txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1026 slacapra 1.90 seedIndex=seedIndex+1
1027     ## the following seeds are not always present
1028 spiga 1.42 if (self.sourceSeedVtx):
1029 slacapra 1.90 txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1030 spiga 1.42 txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1031 ewv 1.131 txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1032 slacapra 1.90 seedIndex += 1
1033     if (self.sourceSeedG4):
1034     txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1035     txt += 'echo "G4Seed: <$G4Seed>"\n'
1036 ewv 1.131 txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1037 slacapra 1.90 seedIndex += 1
1038     if (self.sourceSeedMix):
1039     txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1040     txt += 'echo "MixSeed: <$mixSeed>"\n'
1041 ewv 1.131 txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1042 slacapra 1.90 seedIndex += 1
1043     pass
1044     pass
1045     txt += 'mv -f '+pset+' pset.cfg\n'
1046 slacapra 1.1
1047     if len(self.additional_inbox_files) > 0:
1048 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1049     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1050     txt += 'fi\n'
1051 ewv 1.131 pass
1052 slacapra 1.1
1053 spiga 1.42 if self.pset != None: #CarlosDaniele
1054     txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1055 ewv 1.131
1056 spiga 1.42 txt += '\n'
1057     txt += 'echo "***** cat pset.cfg *********"\n'
1058     txt += 'cat pset.cfg\n'
1059     txt += 'echo "****** end pset.cfg ********"\n'
1060     txt += '\n'
1061 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1062 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1063     txt += 'echo "PSETHASH = $PSETHASH" \n'
1064 ewv 1.131 ##############
1065 fanzago 1.93 txt += '\n'
1066     # txt += 'echo "***** cat pset1.cfg *********"\n'
1067     # txt += 'cat pset1.cfg\n'
1068     # txt += 'echo "****** end pset1.cfg ********"\n'
1069 gutsche 1.3 return txt
1070    
1071 slacapra 1.63 def wsBuildExe(self, nj=0):
1072 gutsche 1.3 """
1073     Put in the script the commands to build an executable
1074     or a library.
1075     """
1076    
1077     txt = ""
1078    
1079     if os.path.isfile(self.tgzNameWithPath):
1080     txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
1081     txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1082     txt += 'untar_status=$? \n'
1083     txt += 'if [ $untar_status -ne 0 ]; then \n'
1084     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1085     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
1086 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1087 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
1088     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1089     txt += ' cd $RUNTIME_AREA\n'
1090     txt += ' /bin/rm -rf $WORKING_DIR\n'
1091     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1092 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1093     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
1094     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1095     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1096     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1097     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1098     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1099 gutsche 1.3 txt += ' fi\n'
1100     txt += ' fi \n'
1101     txt += ' \n'
1102 gutsche 1.7 txt += ' exit 1 \n'
1103 gutsche 1.3 txt += 'else \n'
1104     txt += ' echo "Successful untar" \n'
1105     txt += 'fi \n'
1106 gutsche 1.50 txt += '\n'
1107 fanzago 1.93 txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1108 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1109 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
1110 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1111     #txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1112 fanzago 1.93 #txt += ' export PYTHONPATH=ProdAgentApi\n'
1113 gutsche 1.50 txt += 'else\n'
1114 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1115     #txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1116 fanzago 1.93 #txt += ' export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1117     txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1118 ewv 1.131 ###################
1119 gutsche 1.50 txt += 'fi\n'
1120     txt += '\n'
1121    
1122 gutsche 1.3 pass
1123 ewv 1.131
1124 slacapra 1.1 return txt
1125    
1126     def modifySteeringCards(self, nj):
1127     """
1128 ewv 1.131 modify the card provided by the user,
1129 slacapra 1.1 writing a new card into share dir
1130     """
1131 ewv 1.131
1132 slacapra 1.1 def executableName(self):
1133 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1134 spiga 1.42 return "sh "
1135     else:
1136     return self.executable
1137 slacapra 1.1
1138     def executableArgs(self):
1139 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1140 spiga 1.42 return self.scriptExe + " $NJob"
1141 fanzago 1.115 else:
1142     # if >= CMSSW_1_5_X, add -e
1143     version_array = self.scram.getSWVersion().split('_')
1144     major = 0
1145     minor = 0
1146     try:
1147     major = int(version_array[1])
1148     minor = int(version_array[2])
1149     except:
1150 ewv 1.131 msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1151 fanzago 1.115 raise CrabException(msg)
1152     if major >= 1 and minor >= 5 :
1153     return " -e -p pset.cfg"
1154     else:
1155     return " -p pset.cfg"
1156 slacapra 1.1
1157     def inputSandbox(self, nj):
1158     """
1159     Returns a list of filenames to be put in JDL input sandbox.
1160     """
1161     inp_box = []
1162 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1163     # seen = {}
1164 slacapra 1.1 ## code
1165     if os.path.isfile(self.tgzNameWithPath):
1166     inp_box.append(self.tgzNameWithPath)
1167 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1168     inp_box.append(self.MLtgzfile)
1169 slacapra 1.1 ## config
1170 slacapra 1.70 if not self.pset is None:
1171 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1172 slacapra 1.1 ## additional input files
1173 slacapra 1.97 tgz = self.additionalInputFileTgz()
1174     inp_box.append(tgz)
1175 slacapra 1.1 return inp_box
1176    
1177     def outputSandbox(self, nj):
1178     """
1179     Returns a list of filenames to be put in JDL output sandbox.
1180     """
1181     out_box = []
1182    
1183     ## User Declared output files
1184 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1185 ewv 1.131 n_out = nj + 1
1186 slacapra 1.1 out_box.append(self.numberFile_(out,str(n_out)))
1187     return out_box
1188    
1189     def prepareSteeringCards(self):
1190     """
1191     Make initial modifications of the user's steering card file.
1192     """
1193     return
1194    
1195     def wsRenameOutput(self, nj):
1196     """
1197     Returns part of a job script which renames the produced files.
1198     """
1199    
1200     txt = '\n'
1201 gutsche 1.7 txt += '# directory content\n'
1202     txt += 'ls \n'
1203 slacapra 1.54
1204 fanzago 1.128 txt += 'output_exit_status=0\n'
1205 ewv 1.131
1206 fanzago 1.128 for fileWithSuffix in (self.output_file_sandbox):
1207     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1208     txt += '\n'
1209     txt += '# check output file\n'
1210     txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1211     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1212     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1213     txt += 'else\n'
1214     txt += ' exit_status=60302\n'
1215     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1216     if common.scheduler.boss_scheduler_name == 'condor_g':
1217     txt += ' if [ $middleware == OSG ]; then \n'
1218     txt += ' echo "prepare dummy output file"\n'
1219     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1220     txt += ' fi \n'
1221     txt += 'fi\n'
1222 ewv 1.131
1223 fanzago 1.128 for fileWithSuffix in (self.output_file):
1224 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1225     txt += '\n'
1226 gutsche 1.7 txt += '# check output file\n'
1227 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1228 fanzago 1.117 txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1229     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1230 slacapra 1.106 txt += 'else\n'
1231 fanzago 1.117 txt += ' exit_status=60302\n'
1232     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1233 fanzago 1.128 txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1234     txt += ' output_exit_status=$exit_status\n'
1235 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
1236     txt += ' if [ $middleware == OSG ]; then \n'
1237     txt += ' echo "prepare dummy output file"\n'
1238     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1239     txt += ' fi \n'
1240 slacapra 1.1 txt += 'fi\n'
1241 slacapra 1.105 file_list = []
1242     for fileWithSuffix in (self.output_file):
1243     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1244 ewv 1.131
1245 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1246 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1247 slacapra 1.1 return txt
1248    
1249     def numberFile_(self, file, txt):
1250     """
1251     append _'txt' before last extension of a file
1252     """
1253     p = string.split(file,".")
1254     # take away last extension
1255     name = p[0]
1256     for x in p[1:-1]:
1257 slacapra 1.90 name=name+"."+x
1258 slacapra 1.1 # add "_txt"
1259     if len(p)>1:
1260 slacapra 1.90 ext = p[len(p)-1]
1261     result = name + '_' + txt + "." + ext
1262 slacapra 1.1 else:
1263 slacapra 1.90 result = name + '_' + txt
1264 ewv 1.131
1265 slacapra 1.1 return result
1266    
1267 slacapra 1.63 def getRequirements(self, nj=[]):
1268 slacapra 1.1 """
1269 ewv 1.131 return job requirements to add to jdl files
1270 slacapra 1.1 """
1271     req = ''
1272 slacapra 1.47 if self.version:
1273 slacapra 1.10 req='Member("VO-cms-' + \
1274 slacapra 1.47 self.version + \
1275 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1276 farinafa 1.111 ## SL add requirement for OS version only if SL4
1277     #reSL4 = re.compile( r'slc4' )
1278 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1279 gutsche 1.107 req+=' && Member("VO-cms-' + \
1280 slacapra 1.105 self.executable_arch + \
1281     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1282 gutsche 1.35
1283     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1284    
1285 slacapra 1.1 return req
1286 gutsche 1.3
1287     def configFilename(self):
1288     """ return the config filename """
1289     return self.name()+'.cfg'
1290    
1291     ### OLI_DANIELE
1292     def wsSetupCMSOSGEnvironment_(self):
1293     """
1294     Returns part of a job script which is prepares
1295     the execution environment and which is common for all CMS jobs.
1296     """
1297     txt = '\n'
1298     txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n'
1299     txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1300     txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1301 spiga 1.87 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1302 gutsche 1.3 txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1303 mkirn 1.40 txt += ' elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1304     txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1305 spiga 1.87 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1306 mkirn 1.40 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1307 gutsche 1.3 txt += ' else\n'
1308 mkirn 1.40 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1309 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1310     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1311     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1312 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1313     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1314     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1315 gutsche 1.7 txt += ' exit 1\n'
1316 gutsche 1.3 txt += '\n'
1317     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1318     txt += ' cd $RUNTIME_AREA\n'
1319     txt += ' /bin/rm -rf $WORKING_DIR\n'
1320     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1321 ewv 1.131 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1322     txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1323     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1324 fanzago 1.96 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1325     txt += ' rm -f $RUNTIME_AREA/$repo \n'
1326     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1327     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1328 gutsche 1.3 txt += ' fi\n'
1329     txt += '\n'
1330 gutsche 1.7 txt += ' exit 1\n'
1331 gutsche 1.3 txt += ' fi\n'
1332     txt += '\n'
1333     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1334     txt += ' echo " END SETUP CMS OSG ENVIRONMENT "\n'
1335    
1336     return txt
1337 ewv 1.131
1338 gutsche 1.3 ### OLI_DANIELE
1339     def wsSetupCMSLCGEnvironment_(self):
1340     """
1341     Returns part of a job script which is prepares
1342     the execution environment and which is common for all CMS jobs.
1343     """
1344     txt = ' \n'
1345     txt += ' echo " ### SETUP CMS LCG ENVIRONMENT ### "\n'
1346     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1347     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1348     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1349     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1350     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1351 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1352     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1353     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1354 gutsche 1.7 txt += ' exit 1\n'
1355 gutsche 1.3 txt += ' else\n'
1356     txt += ' echo "Sourcing environment... "\n'
1357     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1358     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1359     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1360     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1361     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1362 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1363     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1364     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1365 gutsche 1.7 txt += ' exit 1\n'
1366 gutsche 1.3 txt += ' fi\n'
1367     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1368     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1369     txt += ' result=$?\n'
1370     txt += ' if [ $result -ne 0 ]; then\n'
1371     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1372     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1373     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1374     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1375 gutsche 1.13 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1376     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1377     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1378 gutsche 1.7 txt += ' exit 1\n'
1379 gutsche 1.3 txt += ' fi\n'
1380     txt += ' fi\n'
1381     txt += ' \n'
1382     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1383     txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1384     return txt
1385 gutsche 1.5
1386 ewv 1.131 ### FEDE FOR DBS OUTPUT PUBLICATION
1387 fanzago 1.93 def modifyReport(self, nj):
1388     """
1389 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1390 fanzago 1.93 """
1391 fanzago 1.94
1392 ewv 1.131 txt = ''
1393 fanzago 1.94 try:
1394 ewv 1.131 publish_data = int(self.cfg_params['USER.publish_data'])
1395 fanzago 1.94 except KeyError:
1396     publish_data = 0
1397 ewv 1.131 if (publish_data == 1):
1398 fanzago 1.122 txt += 'echo "Modify Job Report" \n'
1399     #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1400     ################ FEDE FOR DBS2 #############################################
1401     txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1402     #############################################################################
1403     #try:
1404 ewv 1.131 # publish_data = int(self.cfg_params['USER.publish_data'])
1405 fanzago 1.122 #except KeyError:
1406     # publish_data = 0
1407 fanzago 1.94
1408 fanzago 1.122 txt += 'if [ -z "$SE" ]; then\n'
1409     txt += ' SE="" \n'
1410 ewv 1.131 txt += 'fi \n'
1411 fanzago 1.122 txt += 'if [ -z "$SE_PATH" ]; then\n'
1412     txt += ' SE_PATH="" \n'
1413 ewv 1.131 txt += 'fi \n'
1414     txt += 'echo "SE = $SE"\n'
1415 fanzago 1.122 txt += 'echo "SE_PATH = $SE_PATH"\n'
1416 fanzago 1.94
1417 ewv 1.131 #if (publish_data == 1):
1418 fanzago 1.94 #processedDataset = self.cfg_params['USER.processed_datasetname']
1419     processedDataset = self.cfg_params['USER.publish_data_name']
1420     txt += 'ProcessedDataset='+processedDataset+'\n'
1421     #### LFN=/store/user/<user>/processedDataset_PSETHASH
1422     txt += 'if [ "$SE_PATH" == "" ]; then\n'
1423 fanzago 1.101 #### FEDE: added slash in LFN ##############
1424     txt += ' FOR_LFN=/copy_problems/ \n'
1425 ewv 1.131 txt += 'else \n'
1426 fanzago 1.94 txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1427 fanzago 1.101 ##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1428 fanzago 1.94 txt += ' FOR_LFN=/store$tmp \n'
1429 ewv 1.131 txt += 'fi \n'
1430 fanzago 1.94 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1431     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1432 spiga 1.103 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1433 ewv 1.131 #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1434     txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1435 spiga 1.103 txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1436     #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1437 ewv 1.131
1438 spiga 1.103 txt += 'modifyReport_result=$?\n'
1439     txt += 'echo modifyReport_result = $modifyReport_result\n'
1440     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1441     txt += ' exit_status=1\n'
1442     txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1443     txt += 'else\n'
1444     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1445     txt += 'fi\n'
1446 fanzago 1.94 else:
1447 fanzago 1.122 txt += 'echo "no data publication required"\n'
1448 ewv 1.131 #txt += 'ProcessedDataset=no_data_to_publish \n'
1449 fanzago 1.101 #### FEDE: added slash in LFN ##############
1450 fanzago 1.122 #txt += 'FOR_LFN=/local/ \n'
1451     #txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1452     #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1453 fanzago 1.93 return txt
1454 fanzago 1.99
1455     def cleanEnv(self):
1456     ### OLI_DANIELE
1457 ewv 1.131 txt = ''
1458     txt += 'if [ $middleware == OSG ]; then\n'
1459 fanzago 1.99 txt += ' cd $RUNTIME_AREA\n'
1460     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1461     txt += ' /bin/rm -rf $WORKING_DIR\n'
1462     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1463 ewv 1.131 txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1464     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1465     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1466     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1467 fanzago 1.99 txt += ' rm -f $RUNTIME_AREA/$repo \n'
1468     txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1469     txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1470     txt += ' fi\n'
1471     txt += 'fi\n'
1472     txt += '\n'
1473     return txt
1474 fanzago 1.93
1475 gutsche 1.5 def setParam_(self, param, value):
1476     self._params[param] = value
1477    
1478     def getParams(self):
1479     return self._params
1480 gutsche 1.8
1481     def setTaskid_(self):
1482     self._taskId = self.cfg_params['taskId']
1483 ewv 1.131
1484 gutsche 1.8 def getTaskid(self):
1485     return self._taskId
1486 gutsche 1.35
1487     def uniquelist(self, old):
1488     """
1489     remove duplicates from a list
1490     """
1491     nd={}
1492     for e in old:
1493     nd[e]=0
1494     return nd.keys()
1495 mcinquil 1.121
1496    
1497     def checkOut(self, limit):
1498     """
1499     check the dimension of the output files
1500     """
1501     txt = 'echo "*****************************************"\n'
1502     txt += 'echo "** Starting output sandbox limit check **"\n'
1503     txt += 'echo "*****************************************"\n'
1504     allOutFiles = ""
1505     listOutFiles = []
1506     for fileOut in (self.output_file+self.output_file_sandbox):
1507     if fileOut.find('crab_fjr') == -1:
1508     allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1509     listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1510     txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1511     txt += 'ls -gGhrta;\n'
1512     txt += 'sum=0;\n'
1513     txt += 'for file in '+str(allOutFiles)+' ; do\n'
1514     txt += ' if [ -e $file ]; then\n'
1515     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1516     txt += ' sum=`expr $sum + $tt`\n'
1517     txt += ' else\n'
1518     txt += ' echo "WARNING: output file $file not found!"\n'
1519     txt += ' fi\n'
1520     txt += 'done\n'
1521     txt += 'echo "Total Output dimension: $sum";\n'
1522     txt += 'limit='+str(limit)+';\n'
1523     txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1524     txt += 'if [ $limit -lt $sum ]; then\n'
1525     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1526     txt += ' echo " checking the output file sizes..."\n'
1527     """
1528     txt += ' dim=0;\n'
1529     txt += ' exclude=0;\n'
1530     txt += ' for files in '+str(allOutFiles)+' ; do\n'
1531     txt += ' sumTemp=0;\n'
1532     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1533     txt += ' if [ $file != $file2 ]; then\n'
1534     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1535     txt += ' sumTemp=`expr $sumTemp + $tt`;\n'
1536     txt += ' fi\n'
1537     txt += ' done\n'
1538     txt += ' if [ $sumTemp -lt $limit ]; then\n'
1539     txt += ' if [ $dim -lt $sumTemp ]; then\n'
1540     txt += ' dim=$sumTemp;\n'
1541     txt += ' exclude=$file;\n'
1542     txt += ' fi\n'
1543     txt += ' fi\n'
1544     txt += ' done\n'
1545     txt += ' echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1546     """
1547     txt += ' tot=0;\n'
1548     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1549     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1550     txt += ' tot=`expr $tot + $tt`;\n'
1551     txt += ' if [ $limit -lt $tot ]; then\n'
1552     txt += ' tot=`expr $tot - $tt`;\n'
1553     txt += ' fileLast=$file;\n'
1554     txt += ' break;\n'
1555     txt += ' fi\n'
1556     txt += ' done\n'
1557     txt += ' echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1558 ewv 1.131 txt += ' flag=0;\n'
1559 mcinquil 1.121 txt += ' for filess in '+str(allOutFiles)+' ; do\n'
1560     txt += ' if [ $fileLast = $filess ]; then\n'
1561     txt += ' flag=1;\n'
1562     txt += ' fi\n'
1563     txt += ' if [ $flag -eq 1 ]; then\n'
1564     txt += ' rm -f $filess;\n'
1565     txt += ' fi\n'
1566     txt += ' done\n'
1567     txt += ' ls -agGhrt;\n'
1568     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1569     txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1570     txt += ' exit_status=70000;\n'
1571     txt += 'else'
1572     txt += ' echo "Total Output dimension $sum is fine.";\n'
1573     txt += 'fi\n'
1574     txt += 'echo "*****************************************"\n'
1575     txt += 'echo "*** Ending output sandbox limit check ***"\n'
1576     txt += 'echo "*****************************************"\n'
1577     return txt