ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.160.2.2.2.1
Committed: Fri Apr 4 12:55:47 2008 UTC (17 years ago) by ewv
Content type: text/x-python
Branch: CRAB_2_1_2_br
CVS Tags: CRAB_2_1_2_pre1
Changes since 1.160.2.2: +5 -3 lines
Log Message:
Fixes for CMSSW on branch

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8 fanzago 1.160.2.1 from LFNBaseName import *
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 mcinquil 1.144 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 mcinquil 1.140 self.argsList = []
17 mcinquil 1.144
18 gutsche 1.3 self._params = {}
19     self.cfg_params = cfg_params
20 fanzago 1.115 # init BlackWhiteListParser
21     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
22    
23 slacapra 1.153 self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
24 gutsche 1.72
25 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
26 gutsche 1.38 self.ncjobs = ncjobs
27    
28 slacapra 1.1 log = common.logger
29 ewv 1.131
30 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
31     self.additional_inbox_files = []
32     self.scriptExe = ''
33     self.executable = ''
34 slacapra 1.71 self.executable_arch = self.scram.getArch()
35 slacapra 1.1 self.tgz_name = 'default.tgz'
36 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
37 corvo 1.56 self.scriptName = 'CMSSW.sh'
38 ewv 1.131 self.pset = '' #scrip use case Da
39 spiga 1.42 self.datasetPath = '' #scrip use case Da
40 gutsche 1.3
41 gutsche 1.50 # set FJR file name
42     self.fjrFileName = 'crab_fjr.xml'
43    
44 slacapra 1.1 self.version = self.scram.getSWVersion()
45 ewv 1.131
46 spiga 1.114 #
47     # Try to block creation in case of arch/version mismatch
48     #
49    
50     a = string.split(self.version, "_")
51    
52 ewv 1.160.2.2.2.1 # FUTURE: Tests CMSSW and OS version. Can remove in time
53    
54 spiga 1.114 if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55 fanzago 1.134 msg = "Warning: You are using %s version of CMSSW with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
56     common.logger.message(msg)
57 spiga 1.114 if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59     raise CrabException(msg)
60 ewv 1.131
61 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
62 gutsche 1.5 self.setParam_('application', self.version)
63 slacapra 1.47
64 slacapra 1.1 ### collect Data cards
65 gutsche 1.66
66 slacapra 1.153 if not cfg_params.has_key('CMSSW.datasetpath'):
67 ewv 1.131 msg = "Error: datasetpath not defined "
68 slacapra 1.1 raise CrabException(msg)
69 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
70     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
71     if string.lower(tmp)=='none':
72     self.datasetPath = None
73     self.selectNoInput = 1
74     else:
75     self.datasetPath = tmp
76     self.selectNoInput = 0
77 gutsche 1.5
78     # ML monitoring
79     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
80 slacapra 1.9 if not self.datasetPath:
81     self.setParam_('dataset', 'None')
82     self.setParam_('owner', 'None')
83     else:
84 slacapra 1.153 ## SL what is supposed to fail here?
85 gutsche 1.92 try:
86     datasetpath_split = self.datasetPath.split("/")
87     # standard style
88 mcinquil 1.120 self.setParam_('datasetFull', self.datasetPath)
89 slacapra 1.137 self.setParam_('dataset', datasetpath_split[1])
90     self.setParam_('owner', datasetpath_split[2])
91 gutsche 1.92 except:
92     self.setParam_('dataset', self.datasetPath)
93     self.setParam_('owner', self.datasetPath)
94 ewv 1.131
95 slacapra 1.151 self.setParam_('taskId', common.taskDB.dict('taskId'))
96 gutsche 1.5
97 slacapra 1.1 self.dataTiers = []
98    
99     ## now the application
100 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
101     self.setParam_('exe', self.executable)
102     log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
103 slacapra 1.1
104 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
105 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
106 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
107     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
108     if self.pset.lower() != 'none' :
109     if (not os.path.exists(self.pset)):
110     raise CrabException("User defined PSet file "+self.pset+" does not exist")
111     else:
112     self.pset = None
113 slacapra 1.1
114     # output files
115 slacapra 1.53 ## stuff which must be returned always via sandbox
116     self.output_file_sandbox = []
117    
118     # add fjr report by default via sandbox
119     self.output_file_sandbox.append(self.fjrFileName)
120    
121     # other output files to be returned via sandbox or copied to SE
122 slacapra 1.153 self.output_file = []
123     tmp = cfg_params.get('CMSSW.output_file',None)
124     if tmp :
125     tmpOutFiles = string.split(tmp,',')
126     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
127     for tmp in tmpOutFiles:
128     tmp=string.strip(tmp)
129     self.output_file.append(tmp)
130 slacapra 1.1 pass
131 slacapra 1.153 else:
132 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
133 slacapra 1.153 pass
134 slacapra 1.1
135     # script_exe file as additional file in inputSandbox
136 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
137     if self.scriptExe :
138     if not os.path.isfile(self.scriptExe):
139     msg ="ERROR. file "+self.scriptExe+" not found"
140     raise CrabException(msg)
141     self.additional_inbox_files.append(string.strip(self.scriptExe))
142 slacapra 1.70
143 spiga 1.42 #CarlosDaniele
144     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
145 slacapra 1.70 msg ="Error. script_exe not defined"
146 spiga 1.42 raise CrabException(msg)
147    
148 slacapra 1.1 ## additional input files
149 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
150 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
151 slacapra 1.70 for tmp in tmpAddFiles:
152     tmp = string.strip(tmp)
153     dirname = ''
154     if not tmp[0]=="/": dirname = "."
155 corvo 1.85 files = []
156     if string.find(tmp,"*")>-1:
157     files = glob.glob(os.path.join(dirname, tmp))
158     if len(files)==0:
159     raise CrabException("No additional input file found with this pattern: "+tmp)
160     else:
161     files.append(tmp)
162 slacapra 1.70 for file in files:
163     if not os.path.exists(file):
164     raise CrabException("Additional input file not found: "+file)
165 slacapra 1.45 pass
166 slacapra 1.105 # fname = string.split(file, '/')[-1]
167     # storedFile = common.work_space.pathForTgz()+'share/'+fname
168     # shutil.copyfile(file, storedFile)
169     self.additional_inbox_files.append(string.strip(file))
170 slacapra 1.1 pass
171     pass
172 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
173 slacapra 1.153 pass
174 gutsche 1.3
175 slacapra 1.9 ## Events per job
176 slacapra 1.153 if cfg_params.has_key('CMSSW.events_per_job'):
177 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
178 slacapra 1.9 self.selectEventsPerJob = 1
179 slacapra 1.153 else:
180 slacapra 1.9 self.eventsPerJob = -1
181     self.selectEventsPerJob = 0
182 ewv 1.131
183 slacapra 1.22 ## number of jobs
184 slacapra 1.153 if cfg_params.has_key('CMSSW.number_of_jobs'):
185 slacapra 1.22 self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
186     self.selectNumberOfJobs = 1
187 slacapra 1.153 else:
188 slacapra 1.22 self.theNumberOfJobs = 0
189     self.selectNumberOfJobs = 0
190 slacapra 1.10
191 slacapra 1.153 if cfg_params.has_key('CMSSW.total_number_of_events'):
192 gutsche 1.35 self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
193     self.selectTotalNumberEvents = 1
194 slacapra 1.153 else:
195 gutsche 1.35 self.total_number_of_events = 0
196     self.selectTotalNumberEvents = 0
197    
198 ewv 1.131 if self.pset != None: #CarlosDaniele
199 spiga 1.42 if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
200     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
201     raise CrabException(msg)
202     else:
203     if (self.selectNumberOfJobs == 0):
204     msg = 'Must specify number_of_jobs.'
205     raise CrabException(msg)
206 gutsche 1.35
207 ewv 1.160 ## New method of dealing with seeds
208     self.incrementSeeds = []
209     self.preserveSeeds = []
210     if cfg_params.has_key('CMSSW.preserve_seeds'):
211     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
212     for tmp in tmpList:
213     tmp.strip()
214     self.preserveSeeds.append(tmp)
215     if cfg_params.has_key('CMSSW.increment_seeds'):
216     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
217     for tmp in tmpList:
218     tmp.strip()
219     self.incrementSeeds.append(tmp)
220    
221     ## Old method of dealing with seeds
222     ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
223     ## remove
224 slacapra 1.153 self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
225 ewv 1.160 if self.sourceSeed:
226     print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
227     self.incrementSeeds.append('sourceSeed')
228 slacapra 1.153
229     self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
230 ewv 1.160 if self.sourceSeedVtx:
231     print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
232     self.incrementSeeds.append('VtxSmeared')
233 slacapra 1.22
234 slacapra 1.153 self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
235 ewv 1.160 if self.sourceSeedG4:
236     print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
237     self.incrementSeeds.append('g4SimHits')
238 slacapra 1.90
239 slacapra 1.153 self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
240 ewv 1.160 if self.sourceSeedMix:
241     print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
242     self.incrementSeeds.append('mix')
243 slacapra 1.90
244 slacapra 1.153 self.firstRun = cfg_params.get('CMSSW.first_run',None)
245 slacapra 1.90
246 spiga 1.42 if self.pset != None: #CarlosDaniele
247 ewv 1.131 import PsetManipulator as pp
248 slacapra 1.97 PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
249 gutsche 1.3
250 ewv 1.147 # Copy/return
251    
252 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
253     self.return_data = int(cfg_params.get('USER.return_data',0))
254 ewv 1.147
255 slacapra 1.1 #DBSDLS-start
256 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
257 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
258     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
259 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
260 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
261 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
262 gutsche 1.35 blockSites = {}
263 slacapra 1.9 if self.datasetPath:
264 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
265 ewv 1.131 #DBSDLS-end
266 slacapra 1.1
267     self.tgzNameWithPath = self.getTarBall(self.executable)
268 ewv 1.131
269 slacapra 1.9 ## Select Splitting
270 ewv 1.131 if self.selectNoInput:
271 spiga 1.42 if self.pset == None: #CarlosDaniele
272     self.jobSplittingForScript()
273     else:
274     self.jobSplittingNoInput()
275 gutsche 1.92 else:
276 corvo 1.56 self.jobSplittingByBlocks(blockSites)
277 gutsche 1.5
278 slacapra 1.22 # modify Pset
279 spiga 1.42 if self.pset != None: #CarlosDaniele
280 slacapra 1.86 try:
281 ewv 1.160 # Add FrameworkJobReport to parameter-set, set max events.
282     # Reset later for data jobs by writeCFG which does all modifications
283 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
284 ewv 1.160 PsetEdit.maxEvent(self.eventsPerJob)
285 slacapra 1.90 PsetEdit.psetWriter(self.configFilename())
286 slacapra 1.86 except:
287     msg='Error while manipuliating ParameterSet: exiting...'
288     raise CrabException(msg)
289 gutsche 1.3
290 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
291    
292 slacapra 1.86 import DataDiscovery
293     import DataLocation
294 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
295    
296     datasetPath=self.datasetPath
297    
298 slacapra 1.1 ## Contact the DBS
299 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
300 slacapra 1.1 try:
301 slacapra 1.137 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
302 slacapra 1.1 self.pubdata.fetchDBSInfo()
303    
304 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
305 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
306     raise CrabException(msg)
307 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
308 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
309     raise CrabException(msg)
310 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
311 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
312 slacapra 1.1 raise CrabException(msg)
313    
314 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
315 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
316     self.eventsbyfile=self.pubdata.getEventsPerFile()
317 gutsche 1.3
318 slacapra 1.1 ## get max number of events
319 ewv 1.131 self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
320 slacapra 1.1
321     ## Contact the DLS and build a list of sites hosting the fileblocks
322     try:
323 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
324 gutsche 1.6 dataloc.fetchDLSInfo()
325 slacapra 1.41 except DataLocation.DataLocationError , ex:
326 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
327     raise CrabException(msg)
328 ewv 1.131
329 slacapra 1.1
330 gutsche 1.35 sites = dataloc.getSites()
331     allSites = []
332     listSites = sites.values()
333 slacapra 1.63 for listSite in listSites:
334     for oneSite in listSite:
335 gutsche 1.35 allSites.append(oneSite)
336     allSites = self.uniquelist(allSites)
337 gutsche 1.3
338 gutsche 1.92 # screen output
339     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
340    
341 gutsche 1.35 return sites
342 ewv 1.131
343 mcinquil 1.140 def setArgsList(self, argsList):
344     self.argsList = argsList
345    
346 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
347 slacapra 1.9 """
348 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
349     and no more than one block.
350     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
351     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
352     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
353     self.maxEvents, self.filesbyblock
354     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
355     self.total_number_of_jobs - Total # of jobs
356     self.list_of_args - File(s) job will run on (a list of lists)
357     """
358    
359     # ---- Handle the possible job splitting configurations ---- #
360     if (self.selectTotalNumberEvents):
361     totalEventsRequested = self.total_number_of_events
362     if (self.selectEventsPerJob):
363     eventsPerJobRequested = self.eventsPerJob
364     if (self.selectNumberOfJobs):
365     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
366    
367     # If user requested all the events in the dataset
368     if (totalEventsRequested == -1):
369     eventsRemaining=self.maxEvents
370     # If user requested more events than are in the dataset
371     elif (totalEventsRequested > self.maxEvents):
372     eventsRemaining = self.maxEvents
373     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
374     # If user requested less events than are in the dataset
375     else:
376     eventsRemaining = totalEventsRequested
377 slacapra 1.22
378 slacapra 1.41 # If user requested more events per job than are in the dataset
379     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
380     eventsPerJobRequested = self.maxEvents
381    
382 gutsche 1.35 # For user info at end
383     totalEventCount = 0
384 gutsche 1.3
385 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
386     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
387 slacapra 1.22
388 gutsche 1.35 if (self.selectNumberOfJobs):
389     common.logger.message("May not create the exact number_of_jobs requested.")
390 slacapra 1.23
391 gutsche 1.38 if ( self.ncjobs == 'all' ) :
392     totalNumberOfJobs = 999999999
393     else :
394     totalNumberOfJobs = self.ncjobs
395 ewv 1.131
396 gutsche 1.38
397 gutsche 1.35 blocks = blockSites.keys()
398     blockCount = 0
399     # Backup variable in case self.maxEvents counted events in a non-included block
400     numBlocksInDataset = len(blocks)
401 gutsche 1.3
402 gutsche 1.35 jobCount = 0
403     list_of_lists = []
404 gutsche 1.3
405 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
406     jobsOfBlock = {}
407    
408 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
409     # ---- we've met the requested total # of events ---- #
410 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
411 gutsche 1.35 block = blocks[blockCount]
412 gutsche 1.44 blockCount += 1
413 gutsche 1.104 if block not in jobsOfBlock.keys() :
414     jobsOfBlock[block] = []
415 ewv 1.131
416 gutsche 1.68 if self.eventsbyblock.has_key(block) :
417     numEventsInBlock = self.eventsbyblock[block]
418     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
419 ewv 1.131
420 gutsche 1.68 files = self.filesbyblock[block]
421     numFilesInBlock = len(files)
422     if (numFilesInBlock <= 0):
423     continue
424     fileCount = 0
425    
426     # ---- New block => New job ---- #
427 ewv 1.131 parString = ""
428 gutsche 1.68 # counter for number of events in files currently worked on
429     filesEventCount = 0
430     # flag if next while loop should touch new file
431     newFile = 1
432     # job event counter
433     jobSkipEventCount = 0
434 ewv 1.131
435 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
436     # ---- total # of events or we've gone over all the files in this block ---- #
437     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
438     file = files[fileCount]
439     if newFile :
440     try:
441     numEventsInFile = self.eventsbyfile[file]
442     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
443     # increase filesEventCount
444     filesEventCount += numEventsInFile
445     # Add file to current job
446     parString += '\\\"' + file + '\\\"\,'
447     newFile = 0
448     except KeyError:
449     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
450 ewv 1.131
451 gutsche 1.38
452 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
453     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
454     # if last file in block
455     if ( fileCount == numFilesInBlock-1 ) :
456     # end job using last file, use remaining events in block
457     # close job and touch new file
458     fullString = parString[:-2]
459     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
460     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
461     self.jobDestination.append(blockSites[block])
462     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
463 gutsche 1.92 # fill jobs of block dictionary
464 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
465 gutsche 1.68 # reset counter
466     jobCount = jobCount + 1
467     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
468     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
469     jobSkipEventCount = 0
470     # reset file
471 ewv 1.131 parString = ""
472 gutsche 1.68 filesEventCount = 0
473     newFile = 1
474     fileCount += 1
475     else :
476     # go to next file
477     newFile = 1
478     fileCount += 1
479     # if events in file equal to eventsPerJobRequested
480     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
481 gutsche 1.38 # close job and touch new file
482     fullString = parString[:-2]
483 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
484     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
485 gutsche 1.38 self.jobDestination.append(blockSites[block])
486     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
487 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
488 gutsche 1.38 # reset counter
489     jobCount = jobCount + 1
490 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
491     eventsRemaining = eventsRemaining - eventsPerJobRequested
492 gutsche 1.38 jobSkipEventCount = 0
493     # reset file
494 ewv 1.131 parString = ""
495 gutsche 1.38 filesEventCount = 0
496     newFile = 1
497     fileCount += 1
498 ewv 1.131
499 gutsche 1.68 # if more events in file remain than eventsPerJobRequested
500 gutsche 1.38 else :
501 gutsche 1.68 # close job but don't touch new file
502     fullString = parString[:-2]
503     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
504     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
505     self.jobDestination.append(blockSites[block])
506     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
507 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
508 gutsche 1.68 # increase counter
509     jobCount = jobCount + 1
510     totalEventCount = totalEventCount + eventsPerJobRequested
511     eventsRemaining = eventsRemaining - eventsPerJobRequested
512     # calculate skip events for last file
513     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
514     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
515     # remove all but the last file
516     filesEventCount = self.eventsbyfile[file]
517 ewv 1.160 parString = '\\\"' + file + '\\\"\,'
518 gutsche 1.68 pass # END if
519     pass # END while (iterate over files in the block)
520 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
521 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
522 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
523 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
524 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
525 ewv 1.131
526 gutsche 1.92 # screen output
527     screenOutput = "List of jobs and available destination sites:\n\n"
528    
529 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
530     noSiteBlock = []
531     bloskNoSite = []
532    
533 gutsche 1.92 blockCounter = 0
534 gutsche 1.104 for block in blocks:
535     if block in jobsOfBlock.keys() :
536     blockCounter += 1
537 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
538 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
539 ewv 1.131 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
540 mcinquil 1.124 bloskNoSite.append( blockCounter )
541 ewv 1.131
542 mcinquil 1.124 common.logger.message(screenOutput)
543 fanzago 1.127 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
544 mcinquil 1.126 msg = 'WARNING: No sites are hosting any part of data for block:\n '
545     virgola = ""
546     if len(bloskNoSite) > 1:
547     virgola = ","
548     for block in bloskNoSite:
549     msg += ' ' + str(block) + virgola
550     msg += '\n Related jobs:\n '
551     virgola = ""
552     if len(noSiteBlock) > 1:
553     virgola = ","
554     for range_jobs in noSiteBlock:
555     msg += str(range_jobs) + virgola
556     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
557 slacapra 1.155 if self.cfg_params.has_key('EDG.se_white_list'):
558     msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
559     msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
560     msg += 'Please check if the dataset is available at this site!)\n'
561     if self.cfg_params.has_key('EDG.ce_white_list'):
562     msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
563     msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
564     msg += 'Please check if the dataset is available at this site!)\n'
565    
566 mcinquil 1.126 common.logger.message(msg)
567 gutsche 1.92
568 slacapra 1.9 self.list_of_args = list_of_lists
569     return
570    
571 slacapra 1.21 def jobSplittingNoInput(self):
572 slacapra 1.9 """
573     Perform job splitting based on number of event per job
574     """
575     common.logger.debug(5,'Splitting per events')
576 fanzago 1.130
577 ewv 1.131 if (self.selectEventsPerJob):
578 fanzago 1.130 common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
579     if (self.selectNumberOfJobs):
580     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
581     if (self.selectTotalNumberEvents):
582     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
583 slacapra 1.9
584 slacapra 1.10 if (self.total_number_of_events < 0):
585     msg='Cannot split jobs per Events with "-1" as total number of events'
586     raise CrabException(msg)
587    
588 slacapra 1.22 if (self.selectEventsPerJob):
589 spiga 1.65 if (self.selectTotalNumberEvents):
590     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
591 ewv 1.131 elif(self.selectNumberOfJobs) :
592 spiga 1.65 self.total_number_of_jobs =self.theNumberOfJobs
593 ewv 1.131 self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
594 spiga 1.65
595 slacapra 1.22 elif (self.selectNumberOfJobs) :
596     self.total_number_of_jobs = self.theNumberOfJobs
597     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
598 ewv 1.131
599 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
600    
601     # is there any remainder?
602     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
603    
604     common.logger.debug(5,'Check '+str(check))
605    
606 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
607 slacapra 1.9 if check > 0:
608 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
609 slacapra 1.9
610 slacapra 1.10 # argument is seed number.$i
611 slacapra 1.9 self.list_of_args = []
612     for i in range(self.total_number_of_jobs):
613 gutsche 1.35 ## Since there is no input, any site is good
614 ewv 1.131 self.jobDestination.append([""]) #must be empty to write correctly the xml
615 slacapra 1.90 args=[]
616 spiga 1.57 if (self.firstRun):
617 slacapra 1.138 ## pythia first run
618 slacapra 1.90 args.append(str(self.firstRun)+str(i))
619     self.list_of_args.append(args)
620 ewv 1.131
621 gutsche 1.3 return
622    
623 spiga 1.42
624     def jobSplittingForScript(self):#CarlosDaniele
625     """
626     Perform job splitting based on number of job
627     """
628     common.logger.debug(5,'Splitting per job')
629     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
630    
631     self.total_number_of_jobs = self.theNumberOfJobs
632    
633     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
634    
635     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
636    
637     # argument is seed number.$i
638     self.list_of_args = []
639     for i in range(self.total_number_of_jobs):
640     ## Since there is no input, any site is good
641     # self.jobDestination.append(["Any"])
642     self.jobDestination.append([""])
643     ## no random seed
644     self.list_of_args.append([str(i)])
645     return
646    
647 gutsche 1.3 def split(self, jobParams):
648 ewv 1.131
649 gutsche 1.3 common.jobDB.load()
650     #### Fabio
651     njobs = self.total_number_of_jobs
652 slacapra 1.9 arglist = self.list_of_args
653 gutsche 1.3 # create the empty structure
654     for i in range(njobs):
655     jobParams.append("")
656 ewv 1.131
657 gutsche 1.3 for job in range(njobs):
658 slacapra 1.17 jobParams[job] = arglist[job]
659     # print str(arglist[job])
660     # print jobParams[job]
661 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
662 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
663     common.jobDB.setDestination(job, self.jobDestination[job])
664 gutsche 1.3
665     common.jobDB.save()
666     return
667 ewv 1.131
668 gutsche 1.3 def getJobTypeArguments(self, nj, sched):
669 slacapra 1.17 result = ''
670     for i in common.jobDB.arguments(nj):
671     result=result+str(i)+" "
672     return result
673 ewv 1.131
674 gutsche 1.3 def numberOfJobs(self):
675     # Fabio
676     return self.total_number_of_jobs
677    
678 slacapra 1.1 def getTarBall(self, exe):
679     """
680     Return the TarBall with lib and exe
681     """
682 ewv 1.131
683 slacapra 1.1 # if it exist, just return it
684 corvo 1.56 #
685     # Marco. Let's start to use relative path for Boss XML files
686     #
687     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
688 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
689     return self.tgzNameWithPath
690    
691     # Prepare a tar gzipped file with user binaries.
692     self.buildTar_(exe)
693    
694     return string.strip(self.tgzNameWithPath)
695    
696     def buildTar_(self, executable):
697    
698     # First of all declare the user Scram area
699     swArea = self.scram.getSWArea_()
700     #print "swArea = ", swArea
701 slacapra 1.63 # swVersion = self.scram.getSWVersion()
702     # print "swVersion = ", swVersion
703 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
704     #print "swReleaseTop = ", swReleaseTop
705 ewv 1.131
706 slacapra 1.1 ## check if working area is release top
707     if swReleaseTop == '' or swArea == swReleaseTop:
708     return
709    
710 slacapra 1.61 import tarfile
711     try: # create tar ball
712     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
713     ## First find the executable
714 slacapra 1.86 if (self.executable != ''):
715 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
716     if ( not exeWithPath ):
717     raise CrabException('User executable '+executable+' not found')
718 ewv 1.131
719 slacapra 1.61 ## then check if it's private or not
720     if exeWithPath.find(swReleaseTop) == -1:
721     # the exe is private, so we must ship
722     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
723     path = swArea+'/'
724 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
725     if exeWithPath.find(path) >= 0 :
726     exe = string.replace(exeWithPath, path,'')
727 slacapra 1.129 tar.add(path+exe,exe)
728 corvo 1.85 else :
729     tar.add(exeWithPath,os.path.basename(executable))
730 slacapra 1.61 pass
731     else:
732     # the exe is from release, we'll find it on WN
733     pass
734 ewv 1.131
735 slacapra 1.61 ## Now get the libraries: only those in local working area
736     libDir = 'lib'
737     lib = swArea+'/' +libDir
738     common.logger.debug(5,"lib "+lib+" to be tarred")
739     if os.path.exists(lib):
740     tar.add(lib,libDir)
741 ewv 1.131
742 slacapra 1.61 ## Now check if module dir is present
743     moduleDir = 'module'
744     module = swArea + '/' + moduleDir
745     if os.path.isdir(module):
746     tar.add(module,moduleDir)
747    
748     ## Now check if any data dir(s) is present
749     swAreaLen=len(swArea)
750     for root, dirs, files in os.walk(swArea):
751     if "data" in dirs:
752     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
753     tar.add(root+"/data",root[swAreaLen:]+"/data")
754    
755 ewv 1.156 ### Removed ProdAgent Api dependencies ###
756 fanzago 1.152 ### Add ProdAgent dir to tar
757     #paDir = 'ProdAgentApi'
758     #pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
759     #if os.path.isdir(pa):
760     # tar.add(pa,paDir)
761 fanzago 1.93
762 fanzago 1.152 ## Add ProdCommon dir to tar
763 fanzago 1.93 prodcommonDir = 'ProdCommon'
764     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
765     if os.path.isdir(prodcommonPath):
766     tar.add(prodcommonPath,prodcommonDir)
767 ewv 1.131
768 slacapra 1.61 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
769     tar.close()
770     except :
771     raise CrabException('Could not create tar-ball')
772 gutsche 1.72
773     ## check for tarball size
774     tarballinfo = os.stat(self.tgzNameWithPath)
775     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
776     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
777    
778 slacapra 1.61 ## create tar-ball with ML stuff
779 ewv 1.131 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
780 slacapra 1.61 try:
781     tar = tarfile.open(self.MLtgzfile, "w:gz")
782     path=os.environ['CRABDIR'] + '/python/'
783 ewv 1.160 for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py','writeCfg.py']:
784 slacapra 1.61 tar.add(path+file,file)
785     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
786     tar.close()
787     except :
788 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
789 ewv 1.131
790 slacapra 1.1 return
791 ewv 1.131
792 slacapra 1.97 def additionalInputFileTgz(self):
793     """
794     Put all additional files into a tar ball and return its name
795     """
796     import tarfile
797     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
798     tar = tarfile.open(tarName, "w:gz")
799     for file in self.additional_inbox_files:
800     tar.add(file,string.split(file,'/')[-1])
801     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
802     tar.close()
803     return tarName
804    
805 slacapra 1.1 def wsSetupEnvironment(self, nj):
806     """
807     Returns part of a job script which prepares
808     the execution environment for the job 'nj'.
809     """
810     # Prepare JobType-independent part
811 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
812 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
813 ewv 1.131 txt += 'if [ $middleware == LCG ]; then \n'
814 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
815     txt += 'elif [ $middleware == OSG ]; then\n'
816 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
817 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
818 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
819 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
820     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
821     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
822 gutsche 1.3 txt += ' exit 1\n'
823     txt += ' fi\n'
824 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
825 gutsche 1.3 txt += '\n'
826     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
827     txt += ' cd $WORKING_DIR\n'
828 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
829 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
830 fanzago 1.133 #txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
831     #txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
832 gutsche 1.3 txt += 'fi\n'
833 slacapra 1.1
834     # Prepare JobType-specific part
835     scram = self.scram.commandName()
836     txt += '\n\n'
837 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
838     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
839 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
840     txt += 'status=$?\n'
841     txt += 'if [ $status != 0 ] ; then\n'
842 fanzago 1.133 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
843     txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
844     txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
845     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
846 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
847     txt += ' cd $RUNTIME_AREA\n'
848 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
849     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
850 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
851     txt += ' if [ -d $WORKING_DIR ] ;then\n'
852 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
853     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
854     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
855     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
856 gutsche 1.3 txt += ' fi\n'
857     txt += ' fi \n'
858 fanzago 1.133 txt += ' exit 1 \n'
859 slacapra 1.1 txt += 'fi \n'
860     txt += 'cd '+self.version+'\n'
861 fanzago 1.99 ########## FEDE FOR DBS2 ######################
862     txt += 'SOFTWARE_DIR=`pwd`\n'
863 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
864 fanzago 1.99 ###############################################
865 slacapra 1.1 ### needed grep for bug in scramv1 ###
866     txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
867     # Handle the arguments:
868     txt += "\n"
869 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
870 slacapra 1.1 txt += "\n"
871 mcinquil 1.140 txt += "if [ $nargs -lt "+str(len(self.argsList[nj].split()))+" ]\n"
872 slacapra 1.1 txt += "then\n"
873 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
874 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
875 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
876 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
877 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
878     txt += ' cd $RUNTIME_AREA\n'
879 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
880     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
881 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
882     txt += ' if [ -d $WORKING_DIR ] ;then\n'
883 fanzago 1.96 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
884     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
885     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
886     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
887 gutsche 1.3 txt += ' fi\n'
888     txt += ' fi \n'
889 slacapra 1.1 txt += " exit 1\n"
890     txt += "fi\n"
891     txt += "\n"
892    
893     # Prepare job-specific part
894     job = common.job_list[nj]
895 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
896 ewv 1.131 if (self.datasetPath):
897 fanzago 1.93 txt += '\n'
898     txt += 'DatasetPath='+self.datasetPath+'\n'
899    
900     datasetpath_split = self.datasetPath.split("/")
901 ewv 1.131
902 fanzago 1.93 txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
903     txt += 'DataTier='+datasetpath_split[2]+'\n'
904 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
905 fanzago 1.93
906     else:
907     txt += 'DatasetPath=MCDataTier\n'
908     txt += 'PrimaryDataset=null\n'
909     txt += 'DataTier=null\n'
910     txt += 'ApplicationFamily=MCDataTier\n'
911 spiga 1.42 if self.pset != None: #CarlosDaniele
912     pset = os.path.basename(job.configFilename())
913     txt += '\n'
914 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
915 spiga 1.42 if (self.datasetPath): # standard job
916 ewv 1.160 txt += 'InputFiles=${args[1]}; export InputFiles\n'
917     txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
918     txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
919 spiga 1.42 txt += 'echo "Inputfiles:<$InputFiles>"\n'
920     txt += 'echo "MaxEvents:<$MaxEvents>"\n'
921     txt += 'echo "SkipEvents:<$SkipEvents>"\n'
922     else: # pythia like job
923 ewv 1.160 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
924     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
925     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
926     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
927 slacapra 1.90 if (self.firstRun):
928 ewv 1.160 txt += 'FirstRun=${args[1]}; export FirstRun\n'
929 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
930 slacapra 1.90
931     txt += 'mv -f '+pset+' pset.cfg\n'
932 slacapra 1.1
933     if len(self.additional_inbox_files) > 0:
934 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
935     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
936     txt += 'fi\n'
937 ewv 1.131 pass
938 slacapra 1.1
939 spiga 1.42 if self.pset != None: #CarlosDaniele
940     txt += '\n'
941     txt += 'echo "***** cat pset.cfg *********"\n'
942     txt += 'cat pset.cfg\n'
943     txt += 'echo "****** end pset.cfg ********"\n'
944     txt += '\n'
945 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
946 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
947     txt += 'echo "PSETHASH = $PSETHASH" \n'
948 ewv 1.131 ##############
949 fanzago 1.93 txt += '\n'
950 gutsche 1.3 return txt
951    
952 slacapra 1.63 def wsBuildExe(self, nj=0):
953 gutsche 1.3 """
954     Put in the script the commands to build an executable
955     or a library.
956     """
957    
958 ewv 1.160 txt = '\n#Written by cms_cmssw::wsBuildExe\n'
959 gutsche 1.3
960     if os.path.isfile(self.tgzNameWithPath):
961 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
962 gutsche 1.3 txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
963     txt += 'untar_status=$? \n'
964     txt += 'if [ $untar_status -ne 0 ]; then \n'
965     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
966     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
967 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
968 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
969     txt += ' cd $RUNTIME_AREA\n'
970 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
971     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
972 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
973     txt += ' if [ -d $WORKING_DIR ] ;then\n'
974 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
975     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
976     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
977     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
978 gutsche 1.3 txt += ' fi\n'
979     txt += ' fi \n'
980     txt += ' \n'
981 gutsche 1.7 txt += ' exit 1 \n'
982 gutsche 1.3 txt += 'else \n'
983     txt += ' echo "Successful untar" \n'
984     txt += 'fi \n'
985 gutsche 1.50 txt += '\n'
986 fanzago 1.152 #### Removed ProdAgent API dependencies
987     txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
988 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
989 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
990 fanzago 1.152 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
991 gutsche 1.50 txt += 'else\n'
992 fanzago 1.152 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
993 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
994 ewv 1.131 ###################
995 gutsche 1.50 txt += 'fi\n'
996     txt += '\n'
997    
998 gutsche 1.3 pass
999 ewv 1.131
1000 slacapra 1.1 return txt
1001    
1002     def modifySteeringCards(self, nj):
1003     """
1004 ewv 1.131 modify the card provided by the user,
1005 slacapra 1.1 writing a new card into share dir
1006     """
1007 ewv 1.131
1008 slacapra 1.1 def executableName(self):
1009 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1010 spiga 1.42 return "sh "
1011     else:
1012     return self.executable
1013 slacapra 1.1
1014     def executableArgs(self):
1015 ewv 1.160 # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
1016 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1017 spiga 1.42 return self.scriptExe + " $NJob"
1018 fanzago 1.115 else:
1019     version_array = self.scram.getSWVersion().split('_')
1020     major = 0
1021     minor = 0
1022     try:
1023     major = int(version_array[1])
1024     minor = int(version_array[2])
1025     except:
1026 ewv 1.131 msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1027 fanzago 1.115 raise CrabException(msg)
1028 ewv 1.160
1029     ex_args = ""
1030 ewv 1.160.2.2.2.1 # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
1031 ewv 1.160 # Framework job report
1032 ewv 1.160.2.2.2.1 if (major >= 1 and minor >= 5) or (major >=2):
1033 ewv 1.160 ex_args += " -j " + self.fjrFileName
1034    
1035     # Type of cfg file
1036     if major >= 2 :
1037 ewv 1.160.2.2.2.1 ex_args += " -p pset.py"
1038 fanzago 1.115 else:
1039 ewv 1.160 ex_args += " -p pset.cfg"
1040     return ex_args
1041 slacapra 1.1
1042     def inputSandbox(self, nj):
1043     """
1044     Returns a list of filenames to be put in JDL input sandbox.
1045     """
1046     inp_box = []
1047 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1048     # seen = {}
1049 slacapra 1.1 ## code
1050     if os.path.isfile(self.tgzNameWithPath):
1051     inp_box.append(self.tgzNameWithPath)
1052 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1053     inp_box.append(self.MLtgzfile)
1054 slacapra 1.1 ## config
1055 slacapra 1.70 if not self.pset is None:
1056 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1057 slacapra 1.1 ## additional input files
1058 slacapra 1.97 tgz = self.additionalInputFileTgz()
1059     inp_box.append(tgz)
1060 slacapra 1.1 return inp_box
1061    
1062     def outputSandbox(self, nj):
1063     """
1064     Returns a list of filenames to be put in JDL output sandbox.
1065     """
1066     out_box = []
1067    
1068     ## User Declared output files
1069 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1070 ewv 1.131 n_out = nj + 1
1071 slacapra 1.1 out_box.append(self.numberFile_(out,str(n_out)))
1072     return out_box
1073    
1074     def prepareSteeringCards(self):
1075     """
1076     Make initial modifications of the user's steering card file.
1077     """
1078     return
1079    
1080     def wsRenameOutput(self, nj):
1081     """
1082     Returns part of a job script which renames the produced files.
1083     """
1084    
1085 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1086 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1087     txt += 'echo ">>> current directory content:"\n'
1088 gutsche 1.7 txt += 'ls \n'
1089 fanzago 1.145 txt += '\n'
1090 slacapra 1.54
1091 fanzago 1.128 txt += 'output_exit_status=0\n'
1092 ewv 1.131
1093 fanzago 1.128 for fileWithSuffix in (self.output_file_sandbox):
1094     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1095     txt += '\n'
1096     txt += '# check output file\n'
1097     txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1098 mcinquil 1.144 txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1099     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1100 fanzago 1.128 txt += 'else\n'
1101     txt += ' exit_status=60302\n'
1102 fanzago 1.150 txt += ' echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1103 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
1104 fanzago 1.128 txt += ' if [ $middleware == OSG ]; then \n'
1105     txt += ' echo "prepare dummy output file"\n'
1106     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1107     txt += ' fi \n'
1108     txt += 'fi\n'
1109 ewv 1.131
1110 fanzago 1.128 for fileWithSuffix in (self.output_file):
1111 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1112     txt += '\n'
1113 gutsche 1.7 txt += '# check output file\n'
1114 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1115 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1116     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
1117     txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1118     else:
1119     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1120     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1121 slacapra 1.106 txt += 'else\n'
1122 fanzago 1.117 txt += ' exit_status=60302\n'
1123 fanzago 1.150 txt += ' echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1124 fanzago 1.128 txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1125     txt += ' output_exit_status=$exit_status\n'
1126 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
1127 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
1128     txt += ' echo "prepare dummy output file"\n'
1129     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1130     txt += ' fi \n'
1131 slacapra 1.1 txt += 'fi\n'
1132 slacapra 1.105 file_list = []
1133     for fileWithSuffix in (self.output_file):
1134     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1135 ewv 1.131
1136 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1137 fanzago 1.149 txt += '\n'
1138 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1139     txt += 'echo ">>> current directory content:"\n'
1140     txt += 'ls \n'
1141     txt += '\n'
1142 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1143 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1144 slacapra 1.1 return txt
1145    
1146     def numberFile_(self, file, txt):
1147     """
1148     append _'txt' before last extension of a file
1149     """
1150     p = string.split(file,".")
1151     # take away last extension
1152     name = p[0]
1153     for x in p[1:-1]:
1154 slacapra 1.90 name=name+"."+x
1155 slacapra 1.1 # add "_txt"
1156     if len(p)>1:
1157 slacapra 1.90 ext = p[len(p)-1]
1158     result = name + '_' + txt + "." + ext
1159 slacapra 1.1 else:
1160 slacapra 1.90 result = name + '_' + txt
1161 ewv 1.131
1162 slacapra 1.1 return result
1163    
1164 slacapra 1.63 def getRequirements(self, nj=[]):
1165 slacapra 1.1 """
1166 ewv 1.131 return job requirements to add to jdl files
1167 slacapra 1.1 """
1168     req = ''
1169 slacapra 1.47 if self.version:
1170 slacapra 1.10 req='Member("VO-cms-' + \
1171 slacapra 1.47 self.version + \
1172 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1173 farinafa 1.111 ## SL add requirement for OS version only if SL4
1174     #reSL4 = re.compile( r'slc4' )
1175 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1176 gutsche 1.107 req+=' && Member("VO-cms-' + \
1177 slacapra 1.105 self.executable_arch + \
1178     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1179 gutsche 1.35
1180     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1181 afanfani 1.158 if common.scheduler.name() == "glitecoll":
1182     req += ' && other.GlueCEStateStatus == "Production" '
1183 gutsche 1.35
1184 slacapra 1.1 return req
1185 gutsche 1.3
1186     def configFilename(self):
1187     """ return the config filename """
1188     return self.name()+'.cfg'
1189    
1190     def wsSetupCMSOSGEnvironment_(self):
1191     """
1192     Returns part of a job script which is prepares
1193     the execution environment and which is common for all CMS jobs.
1194     """
1195 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1196     txt += ' echo ">>> setup CMS OSG environment:"\n'
1197 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1198     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1199 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1200 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1201 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1202 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1203     txt += ' else\n'
1204 ewv 1.135 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1205 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1206     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1207     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1208 gutsche 1.3 txt += '\n'
1209 fanzago 1.133 txt += ' cd $RUNTIME_AREA\n'
1210     txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1211     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1212     txt += ' /bin/rm -rf $WORKING_DIR\n'
1213     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1214 ewv 1.135 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1215 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1216     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1217     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1218     txt += ' fi\n'
1219 gutsche 1.3 txt += '\n'
1220 fanzago 1.133 txt += ' exit 1\n'
1221     txt += ' fi\n'
1222 gutsche 1.3 txt += '\n'
1223 fanzago 1.133 txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1224 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1225 gutsche 1.3
1226     return txt
1227 ewv 1.131
1228 gutsche 1.3 ### OLI_DANIELE
1229     def wsSetupCMSLCGEnvironment_(self):
1230     """
1231     Returns part of a job script which is prepares
1232     the execution environment and which is common for all CMS jobs.
1233     """
1234 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1235     txt += ' echo ">>> setup CMS LCG environment:"\n'
1236 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1237     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1238     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1239     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1240     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1241     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1242     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1243     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1244     txt += ' exit 1\n'
1245     txt += ' else\n'
1246     txt += ' echo "Sourcing environment... "\n'
1247     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1248     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1249     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1250     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1251     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1252     txt += ' exit 1\n'
1253     txt += ' fi\n'
1254     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1255     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1256     txt += ' result=$?\n'
1257     txt += ' if [ $result -ne 0 ]; then\n'
1258     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1259     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1260     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1261     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1262     txt += ' exit 1\n'
1263     txt += ' fi\n'
1264     txt += ' fi\n'
1265     txt += ' \n'
1266     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1267 gutsche 1.3 return txt
1268 gutsche 1.5
1269 ewv 1.131 ### FEDE FOR DBS OUTPUT PUBLICATION
1270 fanzago 1.93 def modifyReport(self, nj):
1271     """
1272 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1273 fanzago 1.93 """
1274 fanzago 1.94
1275 ewv 1.160 txt = '\n#Written by cms_cmssw::modifyReport\n'
1276 fanzago 1.94 try:
1277 ewv 1.131 publish_data = int(self.cfg_params['USER.publish_data'])
1278 fanzago 1.94 except KeyError:
1279     publish_data = 0
1280 ewv 1.131 if (publish_data == 1):
1281 fanzago 1.133 txt += 'echo ">>> Modify Job Report:" \n'
1282 fanzago 1.122 ################ FEDE FOR DBS2 #############################################
1283 fanzago 1.152 #txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1284     txt += 'chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1285 fanzago 1.122 #############################################################################
1286 fanzago 1.94
1287 fanzago 1.122 txt += 'if [ -z "$SE" ]; then\n'
1288     txt += ' SE="" \n'
1289 ewv 1.131 txt += 'fi \n'
1290 fanzago 1.122 txt += 'if [ -z "$SE_PATH" ]; then\n'
1291     txt += ' SE_PATH="" \n'
1292 ewv 1.131 txt += 'fi \n'
1293     txt += 'echo "SE = $SE"\n'
1294 fanzago 1.122 txt += 'echo "SE_PATH = $SE_PATH"\n'
1295 fanzago 1.94
1296     processedDataset = self.cfg_params['USER.publish_data_name']
1297 afanfani 1.160.2.2 LFNBaseName = LFNBase(processedDataset)
1298 fanzago 1.94 txt += 'ProcessedDataset='+processedDataset+'\n'
1299     #### LFN=/store/user/<user>/processedDataset_PSETHASH
1300     txt += 'if [ "$SE_PATH" == "" ]; then\n'
1301 fanzago 1.101 #### FEDE: added slash in LFN ##############
1302     txt += ' FOR_LFN=/copy_problems/ \n'
1303 ewv 1.131 txt += 'else \n'
1304 fanzago 1.160.2.1 #### LFN is <LFNBaseName>_PSETHASH
1305     txt += ' FOR_LFN=%s_${PSETHASH}/\n'%(LFNBaseName)
1306 ewv 1.131 txt += 'fi \n'
1307 fanzago 1.94 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1308     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1309 spiga 1.103 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1310 fanzago 1.152 txt += 'echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1311     txt += '$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1312 ewv 1.131
1313 spiga 1.103 txt += 'modifyReport_result=$?\n'
1314     txt += 'echo modifyReport_result = $modifyReport_result\n'
1315     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1316     txt += ' exit_status=1\n'
1317     txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1318     txt += 'else\n'
1319     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1320     txt += 'fi\n'
1321 fanzago 1.94 else:
1322 fanzago 1.122 txt += 'echo "no data publication required"\n'
1323 fanzago 1.93 return txt
1324 fanzago 1.99
1325     def cleanEnv(self):
1326 ewv 1.160 txt = '\n#Written by cms_cmssw::cleanEnv\n'
1327 ewv 1.131 txt += 'if [ $middleware == OSG ]; then\n'
1328 fanzago 1.99 txt += ' cd $RUNTIME_AREA\n'
1329 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1330     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1331 fanzago 1.99 txt += ' /bin/rm -rf $WORKING_DIR\n'
1332     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1333 fanzago 1.133 txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1334     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1335     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1336     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1337 fanzago 1.99 txt += ' fi\n'
1338     txt += 'fi\n'
1339     txt += '\n'
1340     return txt
1341 fanzago 1.93
1342 gutsche 1.5 def setParam_(self, param, value):
1343     self._params[param] = value
1344    
1345     def getParams(self):
1346     return self._params
1347 gutsche 1.8
1348 gutsche 1.35 def uniquelist(self, old):
1349     """
1350     remove duplicates from a list
1351     """
1352     nd={}
1353     for e in old:
1354     nd[e]=0
1355     return nd.keys()
1356 mcinquil 1.121
1357    
1358     def checkOut(self, limit):
1359     """
1360     check the dimension of the output files
1361     """
1362 ewv 1.160 txt = '\n#Written by cms_cmssw::checkOut\n'
1363     txt += 'echo ">>> Starting output sandbox limit check :"\n'
1364 mcinquil 1.121 listOutFiles = []
1365 slacapra 1.151 txt += 'stdoutFile=`ls *stdout` \n'
1366     txt += 'stderrFile=`ls *stderr` \n'
1367 fanzago 1.148 if (self.return_data == 1):
1368 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1369     listOutFiles.append(self.numberFile_(file, '$NJob'))
1370     listOutFiles.append('$stdoutFile')
1371     listOutFiles.append('$stderrFile')
1372 ewv 1.156 else:
1373 spiga 1.157 for file in (self.output_file_sandbox):
1374     listOutFiles.append(self.numberFile_(file, '$NJob'))
1375     listOutFiles.append('$stdoutFile')
1376     listOutFiles.append('$stderrFile')
1377 ewv 1.159
1378 spiga 1.157 txt += 'echo "OUTPUT files: '+string.join(listOutFiles,' ')+'"\n'
1379     txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1380     # txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1381 mcinquil 1.121 txt += 'ls -gGhrta;\n'
1382     txt += 'sum=0;\n'
1383 spiga 1.157 txt += 'for file in $filesToCheck ; do\n'
1384 mcinquil 1.121 txt += ' if [ -e $file ]; then\n'
1385     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1386     txt += ' sum=`expr $sum + $tt`\n'
1387     txt += ' else\n'
1388     txt += ' echo "WARNING: output file $file not found!"\n'
1389     txt += ' fi\n'
1390     txt += 'done\n'
1391     txt += 'echo "Total Output dimension: $sum";\n'
1392     txt += 'limit='+str(limit)+';\n'
1393     txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1394     txt += 'if [ $limit -lt $sum ]; then\n'
1395     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1396     txt += ' echo " checking the output file sizes..."\n'
1397     txt += ' tot=0;\n'
1398 spiga 1.157 txt += ' for filefile in $filesToCheck ; do\n'
1399 mcinquil 1.143 txt += ' dimFile=`ls -gGrta $filefile | awk \'{ print $3 }\';`\n'
1400 mcinquil 1.121 txt += ' tot=`expr $tot + $tt`;\n'
1401 mcinquil 1.143 txt += ' if [ $limit -lt $dimFile ]; then\n'
1402     txt += ' echo "deleting file: $filefile";\n'
1403     txt += ' rm -f $filefile\n'
1404     txt += ' elif [ $limit -lt $tot ]; then\n'
1405     txt += ' echo "deleting file: $filefile";\n'
1406     txt += ' rm -f $filefile\n'
1407     txt += ' else\n'
1408     txt += ' echo "saving file: $filefile"\n'
1409 mcinquil 1.121 txt += ' fi\n'
1410     txt += ' done\n'
1411 mcinquil 1.143
1412 mcinquil 1.121 txt += ' ls -agGhrt;\n'
1413     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1414     txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1415     txt += ' exit_status=70000;\n'
1416     txt += 'else'
1417     txt += ' echo "Total Output dimension $sum is fine.";\n'
1418     txt += 'fi\n'
1419 fanzago 1.133 txt += 'echo "Ending output sandbox limit check"\n'
1420 mcinquil 1.121 return txt