ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.136
Committed: Wed Nov 14 17:53:21 2007 UTC (17 years, 5 months ago) by fanzago
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_0_2_pre5
Changes since 1.135: +2 -0 lines
Log Message:
added echos

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8    
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 gutsche 1.38 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 gutsche 1.3 self._params = {}
17     self.cfg_params = cfg_params
18 gutsche 1.38
19 fanzago 1.115 # init BlackWhiteListParser
20     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21    
22 gutsche 1.72 try:
23     self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24     except KeyError:
25 slacapra 1.86 self.MaxTarBallSize = 9.5
26 gutsche 1.72
27 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
28 gutsche 1.38 self.ncjobs = ncjobs
29    
30 slacapra 1.1 log = common.logger
31 ewv 1.131
32 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
33     self.additional_inbox_files = []
34     self.scriptExe = ''
35     self.executable = ''
36 slacapra 1.71 self.executable_arch = self.scram.getArch()
37 slacapra 1.1 self.tgz_name = 'default.tgz'
38 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
39 corvo 1.56 self.scriptName = 'CMSSW.sh'
40 ewv 1.131 self.pset = '' #scrip use case Da
41 spiga 1.42 self.datasetPath = '' #scrip use case Da
42 gutsche 1.3
43 gutsche 1.50 # set FJR file name
44     self.fjrFileName = 'crab_fjr.xml'
45    
46 slacapra 1.1 self.version = self.scram.getSWVersion()
47 ewv 1.131
48 spiga 1.114 #
49     # Try to block creation in case of arch/version mismatch
50     #
51    
52     a = string.split(self.version, "_")
53    
54     if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55 fanzago 1.134 msg = "Warning: You are using %s version of CMSSW with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
56     common.logger.message(msg)
57 spiga 1.114 if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59     raise CrabException(msg)
60 ewv 1.131
61 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
62 gutsche 1.5 self.setParam_('application', self.version)
63 slacapra 1.47
64 slacapra 1.1 ### collect Data cards
65 gutsche 1.66
66     ## get DBS mode
67     try:
68 slacapra 1.86 self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 gutsche 1.66 except KeyError:
70 slacapra 1.86 self.use_dbs_1 = 0
71 ewv 1.131
72 slacapra 1.1 try:
73 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
74     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
75     if string.lower(tmp)=='none':
76     self.datasetPath = None
77 slacapra 1.21 self.selectNoInput = 1
78 slacapra 1.9 else:
79     self.datasetPath = tmp
80 slacapra 1.21 self.selectNoInput = 0
81 slacapra 1.1 except KeyError:
82 ewv 1.131 msg = "Error: datasetpath not defined "
83 slacapra 1.1 raise CrabException(msg)
84 gutsche 1.5
85     # ML monitoring
86     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
87 slacapra 1.9 if not self.datasetPath:
88     self.setParam_('dataset', 'None')
89     self.setParam_('owner', 'None')
90     else:
91 gutsche 1.92 try:
92     datasetpath_split = self.datasetPath.split("/")
93     # standard style
94 mcinquil 1.120 self.setParam_('datasetFull', self.datasetPath)
95 gutsche 1.92 if self.use_dbs_1 == 1 :
96     self.setParam_('dataset', datasetpath_split[1])
97     self.setParam_('owner', datasetpath_split[-1])
98     else:
99     self.setParam_('dataset', datasetpath_split[1])
100     self.setParam_('owner', datasetpath_split[2])
101     except:
102     self.setParam_('dataset', self.datasetPath)
103     self.setParam_('owner', self.datasetPath)
104 ewv 1.131
105 gutsche 1.8 self.setTaskid_()
106     self.setParam_('taskId', self.cfg_params['taskId'])
107 gutsche 1.5
108 slacapra 1.1 self.dataTiers = []
109    
110     ## now the application
111     try:
112     self.executable = cfg_params['CMSSW.executable']
113 gutsche 1.5 self.setParam_('exe', self.executable)
114 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
115     msg = "Default executable cmsRun overridden. Switch to " + self.executable
116     log.debug(3,msg)
117     except KeyError:
118     self.executable = 'cmsRun'
119 gutsche 1.5 self.setParam_('exe', self.executable)
120 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
121     log.debug(3,msg)
122     pass
123    
124     try:
125     self.pset = cfg_params['CMSSW.pset']
126     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 ewv 1.131 if self.pset.lower() != 'none' :
128 spiga 1.42 if (not os.path.exists(self.pset)):
129     raise CrabException("User defined PSet file "+self.pset+" does not exist")
130     else:
131     self.pset = None
132 slacapra 1.1 except KeyError:
133     raise CrabException("PSet file missing. Cannot run cmsRun ")
134    
135     # output files
136 slacapra 1.53 ## stuff which must be returned always via sandbox
137     self.output_file_sandbox = []
138    
139     # add fjr report by default via sandbox
140     self.output_file_sandbox.append(self.fjrFileName)
141    
142     # other output files to be returned via sandbox or copied to SE
143 slacapra 1.1 try:
144     self.output_file = []
145     tmp = cfg_params['CMSSW.output_file']
146     if tmp != '':
147     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
148     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
149     for tmp in tmpOutFiles:
150     tmp=string.strip(tmp)
151     self.output_file.append(tmp)
152     pass
153     else:
154 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155 slacapra 1.1 pass
156     pass
157     except KeyError:
158 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159 slacapra 1.1 pass
160    
161     # script_exe file as additional file in inputSandbox
162     try:
163 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
164     if self.scriptExe != '':
165     if not os.path.isfile(self.scriptExe):
166 slacapra 1.64 msg ="ERROR. file "+self.scriptExe+" not found"
167 slacapra 1.10 raise CrabException(msg)
168 spiga 1.42 self.additional_inbox_files.append(string.strip(self.scriptExe))
169 slacapra 1.1 except KeyError:
170 spiga 1.42 self.scriptExe = ''
171 slacapra 1.70
172 spiga 1.42 #CarlosDaniele
173     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174 slacapra 1.70 msg ="Error. script_exe not defined"
175 spiga 1.42 raise CrabException(msg)
176    
177 slacapra 1.1 ## additional input files
178     try:
179 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
180 slacapra 1.70 for tmp in tmpAddFiles:
181     tmp = string.strip(tmp)
182     dirname = ''
183     if not tmp[0]=="/": dirname = "."
184 corvo 1.85 files = []
185     if string.find(tmp,"*")>-1:
186     files = glob.glob(os.path.join(dirname, tmp))
187     if len(files)==0:
188     raise CrabException("No additional input file found with this pattern: "+tmp)
189     else:
190     files.append(tmp)
191 slacapra 1.70 for file in files:
192     if not os.path.exists(file):
193     raise CrabException("Additional input file not found: "+file)
194 slacapra 1.45 pass
195 slacapra 1.105 # fname = string.split(file, '/')[-1]
196     # storedFile = common.work_space.pathForTgz()+'share/'+fname
197     # shutil.copyfile(file, storedFile)
198     self.additional_inbox_files.append(string.strip(file))
199 slacapra 1.1 pass
200     pass
201 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
202 slacapra 1.1 except KeyError:
203     pass
204    
205 slacapra 1.9 # files per job
206 slacapra 1.1 try:
207 gutsche 1.35 if (cfg_params['CMSSW.files_per_jobs']):
208     raise CrabException("files_per_jobs no longer supported. Quitting.")
209 gutsche 1.3 except KeyError:
210 gutsche 1.35 pass
211 gutsche 1.3
212 slacapra 1.9 ## Events per job
213 gutsche 1.3 try:
214 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
215 slacapra 1.9 self.selectEventsPerJob = 1
216 gutsche 1.3 except KeyError:
217 slacapra 1.9 self.eventsPerJob = -1
218     self.selectEventsPerJob = 0
219 ewv 1.131
220 slacapra 1.22 ## number of jobs
221     try:
222     self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
223     self.selectNumberOfJobs = 1
224     except KeyError:
225     self.theNumberOfJobs = 0
226     self.selectNumberOfJobs = 0
227 slacapra 1.10
228 gutsche 1.35 try:
229     self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
230     self.selectTotalNumberEvents = 1
231     except KeyError:
232     self.total_number_of_events = 0
233     self.selectTotalNumberEvents = 0
234    
235 ewv 1.131 if self.pset != None: #CarlosDaniele
236 spiga 1.42 if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
237     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
238     raise CrabException(msg)
239     else:
240     if (self.selectNumberOfJobs == 0):
241     msg = 'Must specify number_of_jobs.'
242     raise CrabException(msg)
243 gutsche 1.35
244 slacapra 1.22 ## source seed for pythia
245     try:
246     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
247     except KeyError:
248 slacapra 1.23 self.sourceSeed = None
249     common.logger.debug(5,"No seed given")
250 slacapra 1.22
251 slacapra 1.28 try:
252     self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
253     except KeyError:
254     self.sourceSeedVtx = None
255     common.logger.debug(5,"No vertex seed given")
256 slacapra 1.90
257     try:
258     self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259     except KeyError:
260     self.sourceSeedG4 = None
261     common.logger.debug(5,"No g4 sim hits seed given")
262    
263     try:
264     self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265     except KeyError:
266     self.sourceSeedMix = None
267     common.logger.debug(5,"No mix seed given")
268    
269 spiga 1.57 try:
270     self.firstRun = int(cfg_params['CMSSW.first_run'])
271     except KeyError:
272     self.firstRun = None
273     common.logger.debug(5,"No first run given")
274 spiga 1.42 if self.pset != None: #CarlosDaniele
275 ewv 1.131 import PsetManipulator as pp
276 slacapra 1.97 PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
277 gutsche 1.3
278 slacapra 1.1 #DBSDLS-start
279 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
280 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
281     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
282 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
283 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
284 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
285 gutsche 1.35 blockSites = {}
286 slacapra 1.9 if self.datasetPath:
287 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
288 ewv 1.131 #DBSDLS-end
289 slacapra 1.1
290     self.tgzNameWithPath = self.getTarBall(self.executable)
291 ewv 1.131
292 slacapra 1.9 ## Select Splitting
293 ewv 1.131 if self.selectNoInput:
294 spiga 1.42 if self.pset == None: #CarlosDaniele
295     self.jobSplittingForScript()
296     else:
297     self.jobSplittingNoInput()
298 gutsche 1.92 else:
299 corvo 1.56 self.jobSplittingByBlocks(blockSites)
300 gutsche 1.5
301 slacapra 1.22 # modify Pset
302 spiga 1.42 if self.pset != None: #CarlosDaniele
303 slacapra 1.86 try:
304     if (self.datasetPath): # standard job
305     # allow to processa a fraction of events in a file
306 ewv 1.131 PsetEdit.inputModule("INPUTFILE")
307     PsetEdit.maxEvent(0)
308     PsetEdit.skipEvent(0)
309 slacapra 1.86 else: # pythia like job
310 slacapra 1.90 PsetEdit.maxEvent(self.eventsPerJob)
311 slacapra 1.86 if (self.firstRun):
312 ewv 1.131 PsetEdit.pythiaFirstRun(0) #First Run
313 slacapra 1.86 if (self.sourceSeed) :
314 ewv 1.131 PsetEdit.pythiaSeed(0)
315 slacapra 1.86 if (self.sourceSeedVtx) :
316 ewv 1.131 PsetEdit.vtxSeed(0)
317 slacapra 1.90 if (self.sourceSeedG4) :
318 ewv 1.131 PsetEdit.g4Seed(0)
319 slacapra 1.90 if (self.sourceSeedMix) :
320 ewv 1.131 PsetEdit.mixSeed(0)
321 slacapra 1.86 # add FrameworkJobReport to parameter-set
322 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
323     PsetEdit.psetWriter(self.configFilename())
324 slacapra 1.86 except:
325     msg='Error while manipuliating ParameterSet: exiting...'
326     raise CrabException(msg)
327 gutsche 1.3
328 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
329    
330 slacapra 1.86 import DataDiscovery
331     import DataDiscovery_DBS2
332     import DataLocation
333 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
334    
335     datasetPath=self.datasetPath
336    
337 slacapra 1.1 ## Contact the DBS
338 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
339 slacapra 1.1 try:
340 gutsche 1.66
341 slacapra 1.86 if self.use_dbs_1 == 1 :
342     self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
343     else :
344 corvo 1.85 self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
345 slacapra 1.1 self.pubdata.fetchDBSInfo()
346    
347 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
348 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
349     raise CrabException(msg)
350 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
351 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
352     raise CrabException(msg)
353 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
354 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
355 slacapra 1.1 raise CrabException(msg)
356 gutsche 1.67 except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
357     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
358     raise CrabException(msg)
359     except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
360     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
361     raise CrabException(msg)
362     except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
363     msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
364     raise CrabException(msg)
365 slacapra 1.1
366 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
367 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
368     self.eventsbyfile=self.pubdata.getEventsPerFile()
369 gutsche 1.3
370 slacapra 1.1 ## get max number of events
371 ewv 1.131 self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
372 slacapra 1.1
373     ## Contact the DLS and build a list of sites hosting the fileblocks
374     try:
375 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
376 gutsche 1.6 dataloc.fetchDLSInfo()
377 slacapra 1.41 except DataLocation.DataLocationError , ex:
378 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
379     raise CrabException(msg)
380 ewv 1.131
381 slacapra 1.1
382 gutsche 1.35 sites = dataloc.getSites()
383     allSites = []
384     listSites = sites.values()
385 slacapra 1.63 for listSite in listSites:
386     for oneSite in listSite:
387 gutsche 1.35 allSites.append(oneSite)
388     allSites = self.uniquelist(allSites)
389 gutsche 1.3
390 gutsche 1.92 # screen output
391     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
392    
393 gutsche 1.35 return sites
394 ewv 1.131
395 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
396 slacapra 1.9 """
397 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
398     and no more than one block.
399     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
400     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
401     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
402     self.maxEvents, self.filesbyblock
403     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
404     self.total_number_of_jobs - Total # of jobs
405     self.list_of_args - File(s) job will run on (a list of lists)
406     """
407    
408     # ---- Handle the possible job splitting configurations ---- #
409     if (self.selectTotalNumberEvents):
410     totalEventsRequested = self.total_number_of_events
411     if (self.selectEventsPerJob):
412     eventsPerJobRequested = self.eventsPerJob
413     if (self.selectNumberOfJobs):
414     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
415    
416     # If user requested all the events in the dataset
417     if (totalEventsRequested == -1):
418     eventsRemaining=self.maxEvents
419     # If user requested more events than are in the dataset
420     elif (totalEventsRequested > self.maxEvents):
421     eventsRemaining = self.maxEvents
422     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
423     # If user requested less events than are in the dataset
424     else:
425     eventsRemaining = totalEventsRequested
426 slacapra 1.22
427 slacapra 1.41 # If user requested more events per job than are in the dataset
428     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
429     eventsPerJobRequested = self.maxEvents
430    
431 gutsche 1.35 # For user info at end
432     totalEventCount = 0
433 gutsche 1.3
434 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
435     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
436 slacapra 1.22
437 gutsche 1.35 if (self.selectNumberOfJobs):
438     common.logger.message("May not create the exact number_of_jobs requested.")
439 slacapra 1.23
440 gutsche 1.38 if ( self.ncjobs == 'all' ) :
441     totalNumberOfJobs = 999999999
442     else :
443     totalNumberOfJobs = self.ncjobs
444 ewv 1.131
445 gutsche 1.38
446 gutsche 1.35 blocks = blockSites.keys()
447     blockCount = 0
448     # Backup variable in case self.maxEvents counted events in a non-included block
449     numBlocksInDataset = len(blocks)
450 gutsche 1.3
451 gutsche 1.35 jobCount = 0
452     list_of_lists = []
453 gutsche 1.3
454 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
455     jobsOfBlock = {}
456    
457 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
458     # ---- we've met the requested total # of events ---- #
459 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
460 gutsche 1.35 block = blocks[blockCount]
461 gutsche 1.44 blockCount += 1
462 gutsche 1.104 if block not in jobsOfBlock.keys() :
463     jobsOfBlock[block] = []
464 ewv 1.131
465 gutsche 1.68 if self.eventsbyblock.has_key(block) :
466     numEventsInBlock = self.eventsbyblock[block]
467     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
468 ewv 1.131
469 gutsche 1.68 files = self.filesbyblock[block]
470     numFilesInBlock = len(files)
471     if (numFilesInBlock <= 0):
472     continue
473     fileCount = 0
474    
475     # ---- New block => New job ---- #
476 ewv 1.131 parString = ""
477 gutsche 1.68 # counter for number of events in files currently worked on
478     filesEventCount = 0
479     # flag if next while loop should touch new file
480     newFile = 1
481     # job event counter
482     jobSkipEventCount = 0
483 ewv 1.131
484 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
485     # ---- total # of events or we've gone over all the files in this block ---- #
486     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
487     file = files[fileCount]
488     if newFile :
489     try:
490     numEventsInFile = self.eventsbyfile[file]
491     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
492     # increase filesEventCount
493     filesEventCount += numEventsInFile
494     # Add file to current job
495     parString += '\\\"' + file + '\\\"\,'
496     newFile = 0
497     except KeyError:
498     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
499 ewv 1.131
500 gutsche 1.38
501 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
502     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
503     # if last file in block
504     if ( fileCount == numFilesInBlock-1 ) :
505     # end job using last file, use remaining events in block
506     # close job and touch new file
507     fullString = parString[:-2]
508     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
509     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
510     self.jobDestination.append(blockSites[block])
511     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
512 gutsche 1.92 # fill jobs of block dictionary
513 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
514 gutsche 1.68 # reset counter
515     jobCount = jobCount + 1
516     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
517     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
518     jobSkipEventCount = 0
519     # reset file
520 ewv 1.131 parString = ""
521 gutsche 1.68 filesEventCount = 0
522     newFile = 1
523     fileCount += 1
524     else :
525     # go to next file
526     newFile = 1
527     fileCount += 1
528     # if events in file equal to eventsPerJobRequested
529     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
530 gutsche 1.38 # close job and touch new file
531     fullString = parString[:-2]
532 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
533     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
534 gutsche 1.38 self.jobDestination.append(blockSites[block])
535     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
536 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
537 gutsche 1.38 # reset counter
538     jobCount = jobCount + 1
539 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
540     eventsRemaining = eventsRemaining - eventsPerJobRequested
541 gutsche 1.38 jobSkipEventCount = 0
542     # reset file
543 ewv 1.131 parString = ""
544 gutsche 1.38 filesEventCount = 0
545     newFile = 1
546     fileCount += 1
547 ewv 1.131
548 gutsche 1.68 # if more events in file remain than eventsPerJobRequested
549 gutsche 1.38 else :
550 gutsche 1.68 # close job but don't touch new file
551     fullString = parString[:-2]
552     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
553     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
554     self.jobDestination.append(blockSites[block])
555     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
556 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
557 gutsche 1.68 # increase counter
558     jobCount = jobCount + 1
559     totalEventCount = totalEventCount + eventsPerJobRequested
560     eventsRemaining = eventsRemaining - eventsPerJobRequested
561     # calculate skip events for last file
562     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
563     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
564     # remove all but the last file
565     filesEventCount = self.eventsbyfile[file]
566 ewv 1.131 parString = ""
567 gutsche 1.68 parString += '\\\"' + file + '\\\"\,'
568     pass # END if
569     pass # END while (iterate over files in the block)
570 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
571 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
572 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
573 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
574 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
575 ewv 1.131
576 gutsche 1.92 # screen output
577     screenOutput = "List of jobs and available destination sites:\n\n"
578    
579 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
580     noSiteBlock = []
581     bloskNoSite = []
582    
583 gutsche 1.92 blockCounter = 0
584 gutsche 1.104 for block in blocks:
585     if block in jobsOfBlock.keys() :
586     blockCounter += 1
587 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
588 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
589 ewv 1.131 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
590 mcinquil 1.124 bloskNoSite.append( blockCounter )
591 ewv 1.131
592 mcinquil 1.124 common.logger.message(screenOutput)
593 fanzago 1.127 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
594 mcinquil 1.126 msg = 'WARNING: No sites are hosting any part of data for block:\n '
595     virgola = ""
596     if len(bloskNoSite) > 1:
597     virgola = ","
598     for block in bloskNoSite:
599     msg += ' ' + str(block) + virgola
600     msg += '\n Related jobs:\n '
601     virgola = ""
602     if len(noSiteBlock) > 1:
603     virgola = ","
604     for range_jobs in noSiteBlock:
605     msg += str(range_jobs) + virgola
606     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
607     common.logger.message(msg)
608 gutsche 1.92
609 slacapra 1.9 self.list_of_args = list_of_lists
610     return
611    
612 slacapra 1.21 def jobSplittingNoInput(self):
613 slacapra 1.9 """
614     Perform job splitting based on number of event per job
615     """
616     common.logger.debug(5,'Splitting per events')
617 fanzago 1.130
618 ewv 1.131 if (self.selectEventsPerJob):
619 fanzago 1.130 common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
620     if (self.selectNumberOfJobs):
621     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
622     if (self.selectTotalNumberEvents):
623     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
624 slacapra 1.9
625 slacapra 1.10 if (self.total_number_of_events < 0):
626     msg='Cannot split jobs per Events with "-1" as total number of events'
627     raise CrabException(msg)
628    
629 slacapra 1.22 if (self.selectEventsPerJob):
630 spiga 1.65 if (self.selectTotalNumberEvents):
631     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
632 ewv 1.131 elif(self.selectNumberOfJobs) :
633 spiga 1.65 self.total_number_of_jobs =self.theNumberOfJobs
634 ewv 1.131 self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
635 spiga 1.65
636 slacapra 1.22 elif (self.selectNumberOfJobs) :
637     self.total_number_of_jobs = self.theNumberOfJobs
638     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
639 ewv 1.131
640 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
641    
642     # is there any remainder?
643     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
644    
645     common.logger.debug(5,'Check '+str(check))
646    
647 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
648 slacapra 1.9 if check > 0:
649 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
650 slacapra 1.9
651 slacapra 1.10 # argument is seed number.$i
652 slacapra 1.9 self.list_of_args = []
653     for i in range(self.total_number_of_jobs):
654 gutsche 1.35 ## Since there is no input, any site is good
655 slacapra 1.86 # self.jobDestination.append(["Any"])
656 ewv 1.131 self.jobDestination.append([""]) #must be empty to write correctly the xml
657 slacapra 1.90 args=[]
658 spiga 1.57 if (self.firstRun):
659     ## pythia first run
660 slacapra 1.86 #self.list_of_args.append([(str(self.firstRun)+str(i))])
661 slacapra 1.90 args.append(str(self.firstRun)+str(i))
662 spiga 1.57 else:
663     ## no first run
664 slacapra 1.86 #self.list_of_args.append([str(i)])
665 slacapra 1.90 args.append(str(i))
666 slacapra 1.23 if (self.sourceSeed):
667 slacapra 1.90 args.append(str(self.sourceSeed)+str(i))
668 slacapra 1.28 if (self.sourceSeedVtx):
669 slacapra 1.90 ## + vtx random seed
670     args.append(str(self.sourceSeedVtx)+str(i))
671     if (self.sourceSeedG4):
672     ## + G4 random seed
673     args.append(str(self.sourceSeedG4)+str(i))
674 ewv 1.131 if (self.sourceSeedMix):
675 slacapra 1.90 ## + Mix random seed
676     args.append(str(self.sourceSeedMix)+str(i))
677     pass
678     pass
679     self.list_of_args.append(args)
680     pass
681 ewv 1.131
682 slacapra 1.90 # print self.list_of_args
683 gutsche 1.3
684     return
685    
686 spiga 1.42
687     def jobSplittingForScript(self):#CarlosDaniele
688     """
689     Perform job splitting based on number of job
690     """
691     common.logger.debug(5,'Splitting per job')
692     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
693    
694     self.total_number_of_jobs = self.theNumberOfJobs
695    
696     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
697    
698     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
699    
700     # argument is seed number.$i
701     self.list_of_args = []
702     for i in range(self.total_number_of_jobs):
703     ## Since there is no input, any site is good
704     # self.jobDestination.append(["Any"])
705     self.jobDestination.append([""])
706     ## no random seed
707     self.list_of_args.append([str(i)])
708     return
709    
710 gutsche 1.3 def split(self, jobParams):
711 ewv 1.131
712 gutsche 1.3 common.jobDB.load()
713     #### Fabio
714     njobs = self.total_number_of_jobs
715 slacapra 1.9 arglist = self.list_of_args
716 gutsche 1.3 # create the empty structure
717     for i in range(njobs):
718     jobParams.append("")
719 ewv 1.131
720 gutsche 1.3 for job in range(njobs):
721 slacapra 1.17 jobParams[job] = arglist[job]
722     # print str(arglist[job])
723     # print jobParams[job]
724 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
725 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
726     common.jobDB.setDestination(job, self.jobDestination[job])
727 gutsche 1.3
728     common.jobDB.save()
729     return
730 ewv 1.131
731 gutsche 1.3 def getJobTypeArguments(self, nj, sched):
732 slacapra 1.17 result = ''
733     for i in common.jobDB.arguments(nj):
734     result=result+str(i)+" "
735     return result
736 ewv 1.131
737 gutsche 1.3 def numberOfJobs(self):
738     # Fabio
739     return self.total_number_of_jobs
740    
741 slacapra 1.1 def getTarBall(self, exe):
742     """
743     Return the TarBall with lib and exe
744     """
745 ewv 1.131
746 slacapra 1.1 # if it exist, just return it
747 corvo 1.56 #
748     # Marco. Let's start to use relative path for Boss XML files
749     #
750     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
751 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
752     return self.tgzNameWithPath
753    
754     # Prepare a tar gzipped file with user binaries.
755     self.buildTar_(exe)
756    
757     return string.strip(self.tgzNameWithPath)
758    
759     def buildTar_(self, executable):
760    
761     # First of all declare the user Scram area
762     swArea = self.scram.getSWArea_()
763     #print "swArea = ", swArea
764 slacapra 1.63 # swVersion = self.scram.getSWVersion()
765     # print "swVersion = ", swVersion
766 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
767     #print "swReleaseTop = ", swReleaseTop
768 ewv 1.131
769 slacapra 1.1 ## check if working area is release top
770     if swReleaseTop == '' or swArea == swReleaseTop:
771     return
772    
773 slacapra 1.61 import tarfile
774     try: # create tar ball
775     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
776     ## First find the executable
777 slacapra 1.86 if (self.executable != ''):
778 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
779     if ( not exeWithPath ):
780     raise CrabException('User executable '+executable+' not found')
781 ewv 1.131
782 slacapra 1.61 ## then check if it's private or not
783     if exeWithPath.find(swReleaseTop) == -1:
784     # the exe is private, so we must ship
785     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
786     path = swArea+'/'
787 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
788     if exeWithPath.find(path) >= 0 :
789     exe = string.replace(exeWithPath, path,'')
790 slacapra 1.129 tar.add(path+exe,exe)
791 corvo 1.85 else :
792     tar.add(exeWithPath,os.path.basename(executable))
793 slacapra 1.61 pass
794     else:
795     # the exe is from release, we'll find it on WN
796     pass
797 ewv 1.131
798 slacapra 1.61 ## Now get the libraries: only those in local working area
799     libDir = 'lib'
800     lib = swArea+'/' +libDir
801     common.logger.debug(5,"lib "+lib+" to be tarred")
802     if os.path.exists(lib):
803     tar.add(lib,libDir)
804 ewv 1.131
805 slacapra 1.61 ## Now check if module dir is present
806     moduleDir = 'module'
807     module = swArea + '/' + moduleDir
808     if os.path.isdir(module):
809     tar.add(module,moduleDir)
810    
811     ## Now check if any data dir(s) is present
812     swAreaLen=len(swArea)
813     for root, dirs, files in os.walk(swArea):
814     if "data" in dirs:
815     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
816     tar.add(root+"/data",root[swAreaLen:]+"/data")
817    
818     ## Add ProdAgent dir to tar
819     paDir = 'ProdAgentApi'
820     pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
821     if os.path.isdir(pa):
822     tar.add(pa,paDir)
823 fanzago 1.93
824     ### FEDE FOR DBS PUBLICATION
825     ## Add PRODCOMMON dir to tar
826     prodcommonDir = 'ProdCommon'
827     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
828     if os.path.isdir(prodcommonPath):
829     tar.add(prodcommonPath,prodcommonDir)
830 ewv 1.131 #############################
831    
832 slacapra 1.61 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
833     tar.close()
834     except :
835     raise CrabException('Could not create tar-ball')
836 gutsche 1.72
837     ## check for tarball size
838     tarballinfo = os.stat(self.tgzNameWithPath)
839     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
840     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
841    
842 slacapra 1.61 ## create tar-ball with ML stuff
843 ewv 1.131 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
844 slacapra 1.61 try:
845     tar = tarfile.open(self.MLtgzfile, "w:gz")
846     path=os.environ['CRABDIR'] + '/python/'
847     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
848     tar.add(path+file,file)
849     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
850     tar.close()
851     except :
852 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
853 ewv 1.131
854 slacapra 1.1 return
855 ewv 1.131
856 slacapra 1.97 def additionalInputFileTgz(self):
857     """
858     Put all additional files into a tar ball and return its name
859     """
860     import tarfile
861     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
862     tar = tarfile.open(tarName, "w:gz")
863     for file in self.additional_inbox_files:
864     tar.add(file,string.split(file,'/')[-1])
865     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
866     tar.close()
867     return tarName
868    
869 slacapra 1.1 def wsSetupEnvironment(self, nj):
870     """
871     Returns part of a job script which prepares
872     the execution environment for the job 'nj'.
873     """
874     # Prepare JobType-independent part
875 ewv 1.131 txt = ''
876 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
877 ewv 1.131 txt += 'if [ $middleware == LCG ]; then \n'
878 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
879     txt += 'elif [ $middleware == OSG ]; then\n'
880 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
881 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
882 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
883 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
884     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
885     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
886 gutsche 1.3 txt += ' exit 1\n'
887     txt += ' fi\n'
888 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
889 gutsche 1.3 txt += '\n'
890     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
891     txt += ' cd $WORKING_DIR\n'
892 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
893 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
894 fanzago 1.133 #txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
895     #txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
896 gutsche 1.3 txt += 'fi\n'
897 slacapra 1.1
898     # Prepare JobType-specific part
899     scram = self.scram.commandName()
900     txt += '\n\n'
901 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
902     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
903 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
904     txt += 'status=$?\n'
905     txt += 'if [ $status != 0 ] ; then\n'
906 fanzago 1.133 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
907     txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
908     txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
909     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
910 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
911     txt += ' cd $RUNTIME_AREA\n'
912 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
913     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
914 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
915     txt += ' if [ -d $WORKING_DIR ] ;then\n'
916 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
917     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
918     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
919     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
920 gutsche 1.3 txt += ' fi\n'
921     txt += ' fi \n'
922 fanzago 1.133 txt += ' exit 1 \n'
923 slacapra 1.1 txt += 'fi \n'
924     txt += 'cd '+self.version+'\n'
925 fanzago 1.99 ########## FEDE FOR DBS2 ######################
926     txt += 'SOFTWARE_DIR=`pwd`\n'
927 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
928 fanzago 1.99 ###############################################
929 slacapra 1.1 ### needed grep for bug in scramv1 ###
930 corvo 1.58 txt += scram+' runtime -sh\n'
931 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
932 corvo 1.58 txt += 'echo $PATH\n'
933 slacapra 1.1
934     # Handle the arguments:
935     txt += "\n"
936 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
937 slacapra 1.1 txt += "\n"
938 mkirn 1.32 txt += "if [ $nargs -lt 2 ]\n"
939 slacapra 1.1 txt += "then\n"
940 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
941 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
942 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
943 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
944 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
945     txt += ' cd $RUNTIME_AREA\n'
946 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
947     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
948 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
949     txt += ' if [ -d $WORKING_DIR ] ;then\n'
950 fanzago 1.96 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
951     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
952     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
953     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
954 gutsche 1.3 txt += ' fi\n'
955     txt += ' fi \n'
956 slacapra 1.1 txt += " exit 1\n"
957     txt += "fi\n"
958     txt += "\n"
959    
960     # Prepare job-specific part
961     job = common.job_list[nj]
962 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
963 ewv 1.131 if (self.datasetPath):
964 fanzago 1.93 txt += '\n'
965     txt += 'DatasetPath='+self.datasetPath+'\n'
966    
967     datasetpath_split = self.datasetPath.split("/")
968 ewv 1.131
969 fanzago 1.93 txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
970     txt += 'DataTier='+datasetpath_split[2]+'\n'
971 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
972 fanzago 1.93
973     else:
974     txt += 'DatasetPath=MCDataTier\n'
975     txt += 'PrimaryDataset=null\n'
976     txt += 'DataTier=null\n'
977     txt += 'ApplicationFamily=MCDataTier\n'
978 spiga 1.42 if self.pset != None: #CarlosDaniele
979     pset = os.path.basename(job.configFilename())
980     txt += '\n'
981 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
982 spiga 1.42 if (self.datasetPath): # standard job
983     txt += 'InputFiles=${args[1]}\n'
984     txt += 'MaxEvents=${args[2]}\n'
985     txt += 'SkipEvents=${args[3]}\n'
986     txt += 'echo "Inputfiles:<$InputFiles>"\n'
987 ewv 1.131 txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
988 spiga 1.42 txt += 'echo "MaxEvents:<$MaxEvents>"\n'
989 ewv 1.131 txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
990 spiga 1.42 txt += 'echo "SkipEvents:<$SkipEvents>"\n'
991 ewv 1.131 txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
992 spiga 1.42 else: # pythia like job
993 slacapra 1.90 seedIndex=1
994     if (self.firstRun):
995     txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
996 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
997 ewv 1.131 txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
998 slacapra 1.90 seedIndex=seedIndex+1
999    
1000 spiga 1.57 if (self.sourceSeed):
1001 slacapra 1.90 txt += 'Seed=${args['+str(seedIndex)+']}\n'
1002 ewv 1.131 txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1003 slacapra 1.90 seedIndex=seedIndex+1
1004     ## the following seeds are not always present
1005 spiga 1.42 if (self.sourceSeedVtx):
1006 slacapra 1.90 txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1007 spiga 1.42 txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1008 ewv 1.131 txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1009 slacapra 1.90 seedIndex += 1
1010     if (self.sourceSeedG4):
1011     txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1012     txt += 'echo "G4Seed: <$G4Seed>"\n'
1013 ewv 1.131 txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1014 slacapra 1.90 seedIndex += 1
1015     if (self.sourceSeedMix):
1016     txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1017     txt += 'echo "MixSeed: <$mixSeed>"\n'
1018 ewv 1.131 txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1019 slacapra 1.90 seedIndex += 1
1020     pass
1021     pass
1022     txt += 'mv -f '+pset+' pset.cfg\n'
1023 slacapra 1.1
1024     if len(self.additional_inbox_files) > 0:
1025 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1026     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1027     txt += 'fi\n'
1028 ewv 1.131 pass
1029 slacapra 1.1
1030 spiga 1.42 if self.pset != None: #CarlosDaniele
1031     txt += '\n'
1032     txt += 'echo "***** cat pset.cfg *********"\n'
1033     txt += 'cat pset.cfg\n'
1034     txt += 'echo "****** end pset.cfg ********"\n'
1035     txt += '\n'
1036 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
1037 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1038     txt += 'echo "PSETHASH = $PSETHASH" \n'
1039 ewv 1.131 ##############
1040 fanzago 1.93 txt += '\n'
1041 gutsche 1.3 return txt
1042    
1043 slacapra 1.63 def wsBuildExe(self, nj=0):
1044 gutsche 1.3 """
1045     Put in the script the commands to build an executable
1046     or a library.
1047     """
1048    
1049     txt = ""
1050    
1051     if os.path.isfile(self.tgzNameWithPath):
1052 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
1053 gutsche 1.3 txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1054     txt += 'untar_status=$? \n'
1055     txt += 'if [ $untar_status -ne 0 ]; then \n'
1056     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1057     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
1058 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1059 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
1060     txt += ' cd $RUNTIME_AREA\n'
1061 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1062     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1063 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
1064     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1065 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1066     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
1067     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1068     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1069 gutsche 1.3 txt += ' fi\n'
1070     txt += ' fi \n'
1071     txt += ' \n'
1072 gutsche 1.7 txt += ' exit 1 \n'
1073 gutsche 1.3 txt += 'else \n'
1074     txt += ' echo "Successful untar" \n'
1075     txt += 'fi \n'
1076 gutsche 1.50 txt += '\n'
1077 fanzago 1.133 txt += 'echo ">>> Include ProdAgentApi and PRODCOMMON in PYTHONPATH:"\n'
1078 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1079 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
1080 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1081 gutsche 1.50 txt += 'else\n'
1082 fanzago 1.99 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1083 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1084 ewv 1.131 ###################
1085 gutsche 1.50 txt += 'fi\n'
1086     txt += '\n'
1087    
1088 gutsche 1.3 pass
1089 ewv 1.131
1090 slacapra 1.1 return txt
1091    
1092     def modifySteeringCards(self, nj):
1093     """
1094 ewv 1.131 modify the card provided by the user,
1095 slacapra 1.1 writing a new card into share dir
1096     """
1097 ewv 1.131
1098 slacapra 1.1 def executableName(self):
1099 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1100 spiga 1.42 return "sh "
1101     else:
1102     return self.executable
1103 slacapra 1.1
1104     def executableArgs(self):
1105 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1106 spiga 1.42 return self.scriptExe + " $NJob"
1107 fanzago 1.115 else:
1108     # if >= CMSSW_1_5_X, add -e
1109     version_array = self.scram.getSWVersion().split('_')
1110     major = 0
1111     minor = 0
1112     try:
1113     major = int(version_array[1])
1114     minor = int(version_array[2])
1115     except:
1116 ewv 1.131 msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1117 fanzago 1.115 raise CrabException(msg)
1118     if major >= 1 and minor >= 5 :
1119     return " -e -p pset.cfg"
1120     else:
1121     return " -p pset.cfg"
1122 slacapra 1.1
1123     def inputSandbox(self, nj):
1124     """
1125     Returns a list of filenames to be put in JDL input sandbox.
1126     """
1127     inp_box = []
1128 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1129     # seen = {}
1130 slacapra 1.1 ## code
1131     if os.path.isfile(self.tgzNameWithPath):
1132     inp_box.append(self.tgzNameWithPath)
1133 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1134     inp_box.append(self.MLtgzfile)
1135 slacapra 1.1 ## config
1136 slacapra 1.70 if not self.pset is None:
1137 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1138 slacapra 1.1 ## additional input files
1139 slacapra 1.97 tgz = self.additionalInputFileTgz()
1140     inp_box.append(tgz)
1141 slacapra 1.1 return inp_box
1142    
1143     def outputSandbox(self, nj):
1144     """
1145     Returns a list of filenames to be put in JDL output sandbox.
1146     """
1147     out_box = []
1148    
1149     ## User Declared output files
1150 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1151 ewv 1.131 n_out = nj + 1
1152 slacapra 1.1 out_box.append(self.numberFile_(out,str(n_out)))
1153     return out_box
1154    
1155     def prepareSteeringCards(self):
1156     """
1157     Make initial modifications of the user's steering card file.
1158     """
1159     return
1160    
1161     def wsRenameOutput(self, nj):
1162     """
1163     Returns part of a job script which renames the produced files.
1164     """
1165    
1166     txt = '\n'
1167 fanzago 1.133 txt += 'echo" >>> directory content:"\n'
1168 gutsche 1.7 txt += 'ls \n'
1169 fanzago 1.133 txt = '\n'
1170 slacapra 1.54
1171 fanzago 1.128 txt += 'output_exit_status=0\n'
1172 ewv 1.131
1173 fanzago 1.128 for fileWithSuffix in (self.output_file_sandbox):
1174     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1175     txt += '\n'
1176     txt += '# check output file\n'
1177     txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1178     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1179     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1180     txt += 'else\n'
1181     txt += ' exit_status=60302\n'
1182     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1183     if common.scheduler.boss_scheduler_name == 'condor_g':
1184     txt += ' if [ $middleware == OSG ]; then \n'
1185     txt += ' echo "prepare dummy output file"\n'
1186     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1187     txt += ' fi \n'
1188     txt += 'fi\n'
1189 ewv 1.131
1190 fanzago 1.128 for fileWithSuffix in (self.output_file):
1191 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1192     txt += '\n'
1193 gutsche 1.7 txt += '# check output file\n'
1194 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1195 fanzago 1.117 txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1196     txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1197 slacapra 1.106 txt += 'else\n'
1198 fanzago 1.117 txt += ' exit_status=60302\n'
1199     txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1200 fanzago 1.128 txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1201     txt += ' output_exit_status=$exit_status\n'
1202 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
1203     txt += ' if [ $middleware == OSG ]; then \n'
1204     txt += ' echo "prepare dummy output file"\n'
1205     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1206     txt += ' fi \n'
1207 slacapra 1.1 txt += 'fi\n'
1208 slacapra 1.105 file_list = []
1209     for fileWithSuffix in (self.output_file):
1210     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1211 ewv 1.131
1212 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1213 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1214 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1215 slacapra 1.1 return txt
1216    
1217     def numberFile_(self, file, txt):
1218     """
1219     append _'txt' before last extension of a file
1220     """
1221     p = string.split(file,".")
1222     # take away last extension
1223     name = p[0]
1224     for x in p[1:-1]:
1225 slacapra 1.90 name=name+"."+x
1226 slacapra 1.1 # add "_txt"
1227     if len(p)>1:
1228 slacapra 1.90 ext = p[len(p)-1]
1229     result = name + '_' + txt + "." + ext
1230 slacapra 1.1 else:
1231 slacapra 1.90 result = name + '_' + txt
1232 ewv 1.131
1233 slacapra 1.1 return result
1234    
1235 slacapra 1.63 def getRequirements(self, nj=[]):
1236 slacapra 1.1 """
1237 ewv 1.131 return job requirements to add to jdl files
1238 slacapra 1.1 """
1239     req = ''
1240 slacapra 1.47 if self.version:
1241 slacapra 1.10 req='Member("VO-cms-' + \
1242 slacapra 1.47 self.version + \
1243 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1244 farinafa 1.111 ## SL add requirement for OS version only if SL4
1245     #reSL4 = re.compile( r'slc4' )
1246 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1247 gutsche 1.107 req+=' && Member("VO-cms-' + \
1248 slacapra 1.105 self.executable_arch + \
1249     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1250 gutsche 1.35
1251     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1252    
1253 slacapra 1.1 return req
1254 gutsche 1.3
1255     def configFilename(self):
1256     """ return the config filename """
1257     return self.name()+'.cfg'
1258    
1259     def wsSetupCMSOSGEnvironment_(self):
1260     """
1261     Returns part of a job script which is prepares
1262     the execution environment and which is common for all CMS jobs.
1263     """
1264 fanzago 1.133 txt = ' echo ">>> setup CMS OSG environment:"\n'
1265     txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1266     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1267 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1268 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1269 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1270 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1271     txt += ' else\n'
1272 ewv 1.135 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1273 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1274     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1275     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1276 gutsche 1.3 txt += '\n'
1277 fanzago 1.133 txt += ' cd $RUNTIME_AREA\n'
1278     txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1279     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1280     txt += ' /bin/rm -rf $WORKING_DIR\n'
1281     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1282 ewv 1.135 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1283 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1284     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1285     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1286     txt += ' fi\n'
1287 gutsche 1.3 txt += '\n'
1288 fanzago 1.133 txt += ' exit 1\n'
1289     txt += ' fi\n'
1290 gutsche 1.3 txt += '\n'
1291 fanzago 1.133 txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1292 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1293 gutsche 1.3
1294     return txt
1295 ewv 1.131
1296 gutsche 1.3 ### OLI_DANIELE
1297     def wsSetupCMSLCGEnvironment_(self):
1298     """
1299     Returns part of a job script which is prepares
1300     the execution environment and which is common for all CMS jobs.
1301     """
1302 fanzago 1.133 txt = ' echo ">>> setup CMS LCG environment:"\n'
1303     txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1304     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1305     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1306     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1307     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1308     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1309     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1310     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1311     txt += ' exit 1\n'
1312     txt += ' else\n'
1313     txt += ' echo "Sourcing environment... "\n'
1314     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1315     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1316     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1317     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1318     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1319     txt += ' exit 1\n'
1320     txt += ' fi\n'
1321     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1322     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1323     txt += ' result=$?\n'
1324     txt += ' if [ $result -ne 0 ]; then\n'
1325     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1326     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1327     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1328     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1329     txt += ' exit 1\n'
1330     txt += ' fi\n'
1331     txt += ' fi\n'
1332     txt += ' \n'
1333     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1334 gutsche 1.3 return txt
1335 gutsche 1.5
1336 ewv 1.131 ### FEDE FOR DBS OUTPUT PUBLICATION
1337 fanzago 1.93 def modifyReport(self, nj):
1338     """
1339 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1340 fanzago 1.93 """
1341 fanzago 1.94
1342 ewv 1.131 txt = ''
1343 fanzago 1.94 try:
1344 ewv 1.131 publish_data = int(self.cfg_params['USER.publish_data'])
1345 fanzago 1.94 except KeyError:
1346     publish_data = 0
1347 ewv 1.131 if (publish_data == 1):
1348 fanzago 1.133 txt += 'echo ">>> Modify Job Report:" \n'
1349 fanzago 1.122 ################ FEDE FOR DBS2 #############################################
1350     txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1351     #############################################################################
1352 fanzago 1.94
1353 fanzago 1.122 txt += 'if [ -z "$SE" ]; then\n'
1354     txt += ' SE="" \n'
1355 ewv 1.131 txt += 'fi \n'
1356 fanzago 1.122 txt += 'if [ -z "$SE_PATH" ]; then\n'
1357     txt += ' SE_PATH="" \n'
1358 ewv 1.131 txt += 'fi \n'
1359     txt += 'echo "SE = $SE"\n'
1360 fanzago 1.122 txt += 'echo "SE_PATH = $SE_PATH"\n'
1361 fanzago 1.94
1362     processedDataset = self.cfg_params['USER.publish_data_name']
1363     txt += 'ProcessedDataset='+processedDataset+'\n'
1364     #### LFN=/store/user/<user>/processedDataset_PSETHASH
1365     txt += 'if [ "$SE_PATH" == "" ]; then\n'
1366 fanzago 1.101 #### FEDE: added slash in LFN ##############
1367     txt += ' FOR_LFN=/copy_problems/ \n'
1368 ewv 1.131 txt += 'else \n'
1369 fanzago 1.94 txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1370 fanzago 1.101 ##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1371 fanzago 1.94 txt += ' FOR_LFN=/store$tmp \n'
1372 ewv 1.131 txt += 'fi \n'
1373 fanzago 1.94 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1374     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1375 spiga 1.103 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1376 ewv 1.131 #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1377     txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1378 spiga 1.103 txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1379     #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1380 ewv 1.131
1381 spiga 1.103 txt += 'modifyReport_result=$?\n'
1382     txt += 'echo modifyReport_result = $modifyReport_result\n'
1383     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1384     txt += ' exit_status=1\n'
1385     txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1386     txt += 'else\n'
1387     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1388     txt += 'fi\n'
1389 fanzago 1.94 else:
1390 fanzago 1.122 txt += 'echo "no data publication required"\n'
1391 fanzago 1.93 return txt
1392 fanzago 1.99
1393     def cleanEnv(self):
1394 ewv 1.131 txt = ''
1395     txt += 'if [ $middleware == OSG ]; then\n'
1396 fanzago 1.99 txt += ' cd $RUNTIME_AREA\n'
1397 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1398     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1399 fanzago 1.99 txt += ' /bin/rm -rf $WORKING_DIR\n'
1400     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1401 fanzago 1.133 txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1402     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1403     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1404     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1405 fanzago 1.99 txt += ' fi\n'
1406     txt += 'fi\n'
1407     txt += '\n'
1408     return txt
1409 fanzago 1.93
1410 gutsche 1.5 def setParam_(self, param, value):
1411     self._params[param] = value
1412    
1413     def getParams(self):
1414     return self._params
1415 gutsche 1.8
1416     def setTaskid_(self):
1417     self._taskId = self.cfg_params['taskId']
1418 ewv 1.131
1419 gutsche 1.8 def getTaskid(self):
1420     return self._taskId
1421 gutsche 1.35
1422     def uniquelist(self, old):
1423     """
1424     remove duplicates from a list
1425     """
1426     nd={}
1427     for e in old:
1428     nd[e]=0
1429     return nd.keys()
1430 mcinquil 1.121
1431    
1432     def checkOut(self, limit):
1433     """
1434     check the dimension of the output files
1435     """
1436 fanzago 1.133 txt += 'echo ">>> Starting output sandbox limit check :"\n'
1437 mcinquil 1.121 allOutFiles = ""
1438     listOutFiles = []
1439     for fileOut in (self.output_file+self.output_file_sandbox):
1440     if fileOut.find('crab_fjr') == -1:
1441     allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1442     listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1443     txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1444     txt += 'ls -gGhrta;\n'
1445     txt += 'sum=0;\n'
1446     txt += 'for file in '+str(allOutFiles)+' ; do\n'
1447     txt += ' if [ -e $file ]; then\n'
1448     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1449     txt += ' sum=`expr $sum + $tt`\n'
1450     txt += ' else\n'
1451     txt += ' echo "WARNING: output file $file not found!"\n'
1452     txt += ' fi\n'
1453     txt += 'done\n'
1454     txt += 'echo "Total Output dimension: $sum";\n'
1455     txt += 'limit='+str(limit)+';\n'
1456     txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1457     txt += 'if [ $limit -lt $sum ]; then\n'
1458     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1459     txt += ' echo " checking the output file sizes..."\n'
1460     """
1461     txt += ' dim=0;\n'
1462     txt += ' exclude=0;\n'
1463     txt += ' for files in '+str(allOutFiles)+' ; do\n'
1464     txt += ' sumTemp=0;\n'
1465     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1466     txt += ' if [ $file != $file2 ]; then\n'
1467     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1468     txt += ' sumTemp=`expr $sumTemp + $tt`;\n'
1469     txt += ' fi\n'
1470     txt += ' done\n'
1471     txt += ' if [ $sumTemp -lt $limit ]; then\n'
1472     txt += ' if [ $dim -lt $sumTemp ]; then\n'
1473     txt += ' dim=$sumTemp;\n'
1474     txt += ' exclude=$file;\n'
1475     txt += ' fi\n'
1476     txt += ' fi\n'
1477     txt += ' done\n'
1478     txt += ' echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1479     """
1480     txt += ' tot=0;\n'
1481     txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1482     txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1483     txt += ' tot=`expr $tot + $tt`;\n'
1484     txt += ' if [ $limit -lt $tot ]; then\n'
1485     txt += ' tot=`expr $tot - $tt`;\n'
1486     txt += ' fileLast=$file;\n'
1487     txt += ' break;\n'
1488     txt += ' fi\n'
1489     txt += ' done\n'
1490     txt += ' echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1491 ewv 1.131 txt += ' flag=0;\n'
1492 mcinquil 1.121 txt += ' for filess in '+str(allOutFiles)+' ; do\n'
1493     txt += ' if [ $fileLast = $filess ]; then\n'
1494     txt += ' flag=1;\n'
1495     txt += ' fi\n'
1496     txt += ' if [ $flag -eq 1 ]; then\n'
1497     txt += ' rm -f $filess;\n'
1498     txt += ' fi\n'
1499     txt += ' done\n'
1500     txt += ' ls -agGhrt;\n'
1501     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1502     txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1503     txt += ' exit_status=70000;\n'
1504     txt += 'else'
1505     txt += ' echo "Total Output dimension $sum is fine.";\n'
1506     txt += 'fi\n'
1507 fanzago 1.133 txt += 'echo "Ending output sandbox limit check"\n'
1508 mcinquil 1.121 return txt