ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.161
Committed: Mon Mar 3 16:41:24 2008 UTC (17 years, 1 month ago) by fanzago
Content type: text/x-python
Branch: MAIN
Changes since 1.160: +154 -120 lines
Log Message:
first changes about wrapper

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8    
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 mcinquil 1.144 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 mcinquil 1.140 self.argsList = []
17 mcinquil 1.144
18 gutsche 1.3 self._params = {}
19     self.cfg_params = cfg_params
20 fanzago 1.115 # init BlackWhiteListParser
21     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
22    
23 slacapra 1.153 self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
24 gutsche 1.72
25 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
26 gutsche 1.38 self.ncjobs = ncjobs
27    
28 slacapra 1.1 log = common.logger
29 ewv 1.131
30 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
31     self.additional_inbox_files = []
32     self.scriptExe = ''
33     self.executable = ''
34 slacapra 1.71 self.executable_arch = self.scram.getArch()
35 slacapra 1.1 self.tgz_name = 'default.tgz'
36 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
37 corvo 1.56 self.scriptName = 'CMSSW.sh'
38 ewv 1.131 self.pset = '' #scrip use case Da
39 spiga 1.42 self.datasetPath = '' #scrip use case Da
40 gutsche 1.3
41 gutsche 1.50 # set FJR file name
42     self.fjrFileName = 'crab_fjr.xml'
43    
44 slacapra 1.1 self.version = self.scram.getSWVersion()
45 ewv 1.131
46 spiga 1.114 #
47     # Try to block creation in case of arch/version mismatch
48     #
49    
50     a = string.split(self.version, "_")
51    
52     if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
53 fanzago 1.134 msg = "Warning: You are using %s version of CMSSW with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
54     common.logger.message(msg)
55 spiga 1.114 if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
56     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
57     raise CrabException(msg)
58 ewv 1.131
59 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
60 gutsche 1.5 self.setParam_('application', self.version)
61 slacapra 1.47
62 slacapra 1.1 ### collect Data cards
63 gutsche 1.66
64 slacapra 1.153 if not cfg_params.has_key('CMSSW.datasetpath'):
65 ewv 1.131 msg = "Error: datasetpath not defined "
66 slacapra 1.1 raise CrabException(msg)
67 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
68     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
69     if string.lower(tmp)=='none':
70     self.datasetPath = None
71     self.selectNoInput = 1
72     else:
73     self.datasetPath = tmp
74     self.selectNoInput = 0
75 gutsche 1.5
76     # ML monitoring
77     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
78 slacapra 1.9 if not self.datasetPath:
79     self.setParam_('dataset', 'None')
80     self.setParam_('owner', 'None')
81     else:
82 slacapra 1.153 ## SL what is supposed to fail here?
83 gutsche 1.92 try:
84     datasetpath_split = self.datasetPath.split("/")
85     # standard style
86 mcinquil 1.120 self.setParam_('datasetFull', self.datasetPath)
87 slacapra 1.137 self.setParam_('dataset', datasetpath_split[1])
88     self.setParam_('owner', datasetpath_split[2])
89 gutsche 1.92 except:
90     self.setParam_('dataset', self.datasetPath)
91     self.setParam_('owner', self.datasetPath)
92 ewv 1.131
93 slacapra 1.151 self.setParam_('taskId', common.taskDB.dict('taskId'))
94 gutsche 1.5
95 slacapra 1.1 self.dataTiers = []
96    
97     ## now the application
98 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
99     self.setParam_('exe', self.executable)
100     log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
101 slacapra 1.1
102 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
103 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
104 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
105     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
106     if self.pset.lower() != 'none' :
107     if (not os.path.exists(self.pset)):
108     raise CrabException("User defined PSet file "+self.pset+" does not exist")
109     else:
110     self.pset = None
111 slacapra 1.1
112     # output files
113 slacapra 1.53 ## stuff which must be returned always via sandbox
114     self.output_file_sandbox = []
115    
116     # add fjr report by default via sandbox
117     self.output_file_sandbox.append(self.fjrFileName)
118    
119     # other output files to be returned via sandbox or copied to SE
120 slacapra 1.153 self.output_file = []
121     tmp = cfg_params.get('CMSSW.output_file',None)
122     if tmp :
123     tmpOutFiles = string.split(tmp,',')
124     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
125     for tmp in tmpOutFiles:
126     tmp=string.strip(tmp)
127     self.output_file.append(tmp)
128 slacapra 1.1 pass
129 slacapra 1.153 else:
130 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
131 slacapra 1.153 pass
132 slacapra 1.1
133     # script_exe file as additional file in inputSandbox
134 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
135     if self.scriptExe :
136     if not os.path.isfile(self.scriptExe):
137     msg ="ERROR. file "+self.scriptExe+" not found"
138     raise CrabException(msg)
139     self.additional_inbox_files.append(string.strip(self.scriptExe))
140 slacapra 1.70
141 spiga 1.42 #CarlosDaniele
142     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
143 slacapra 1.70 msg ="Error. script_exe not defined"
144 spiga 1.42 raise CrabException(msg)
145    
146 slacapra 1.1 ## additional input files
147 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
148 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
149 slacapra 1.70 for tmp in tmpAddFiles:
150     tmp = string.strip(tmp)
151     dirname = ''
152     if not tmp[0]=="/": dirname = "."
153 corvo 1.85 files = []
154     if string.find(tmp,"*")>-1:
155     files = glob.glob(os.path.join(dirname, tmp))
156     if len(files)==0:
157     raise CrabException("No additional input file found with this pattern: "+tmp)
158     else:
159     files.append(tmp)
160 slacapra 1.70 for file in files:
161     if not os.path.exists(file):
162     raise CrabException("Additional input file not found: "+file)
163 slacapra 1.45 pass
164 slacapra 1.105 # fname = string.split(file, '/')[-1]
165     # storedFile = common.work_space.pathForTgz()+'share/'+fname
166     # shutil.copyfile(file, storedFile)
167     self.additional_inbox_files.append(string.strip(file))
168 slacapra 1.1 pass
169     pass
170 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
171 slacapra 1.153 pass
172 gutsche 1.3
173 slacapra 1.9 ## Events per job
174 slacapra 1.153 if cfg_params.has_key('CMSSW.events_per_job'):
175 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
176 slacapra 1.9 self.selectEventsPerJob = 1
177 slacapra 1.153 else:
178 slacapra 1.9 self.eventsPerJob = -1
179     self.selectEventsPerJob = 0
180 ewv 1.131
181 slacapra 1.22 ## number of jobs
182 slacapra 1.153 if cfg_params.has_key('CMSSW.number_of_jobs'):
183 slacapra 1.22 self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
184     self.selectNumberOfJobs = 1
185 slacapra 1.153 else:
186 slacapra 1.22 self.theNumberOfJobs = 0
187     self.selectNumberOfJobs = 0
188 slacapra 1.10
189 slacapra 1.153 if cfg_params.has_key('CMSSW.total_number_of_events'):
190 gutsche 1.35 self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
191     self.selectTotalNumberEvents = 1
192 slacapra 1.153 else:
193 gutsche 1.35 self.total_number_of_events = 0
194     self.selectTotalNumberEvents = 0
195    
196 ewv 1.131 if self.pset != None: #CarlosDaniele
197 spiga 1.42 if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
198     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
199     raise CrabException(msg)
200     else:
201     if (self.selectNumberOfJobs == 0):
202     msg = 'Must specify number_of_jobs.'
203     raise CrabException(msg)
204 gutsche 1.35
205 ewv 1.160 ## New method of dealing with seeds
206     self.incrementSeeds = []
207     self.preserveSeeds = []
208     if cfg_params.has_key('CMSSW.preserve_seeds'):
209     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
210     for tmp in tmpList:
211     tmp.strip()
212     self.preserveSeeds.append(tmp)
213     if cfg_params.has_key('CMSSW.increment_seeds'):
214     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
215     for tmp in tmpList:
216     tmp.strip()
217     self.incrementSeeds.append(tmp)
218    
219     ## Old method of dealing with seeds
220     ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
221     ## remove
222 slacapra 1.153 self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
223 ewv 1.160 if self.sourceSeed:
224     print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
225     self.incrementSeeds.append('sourceSeed')
226 slacapra 1.153
227     self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
228 ewv 1.160 if self.sourceSeedVtx:
229     print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
230     self.incrementSeeds.append('VtxSmeared')
231 slacapra 1.22
232 slacapra 1.153 self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
233 ewv 1.160 if self.sourceSeedG4:
234     print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
235     self.incrementSeeds.append('g4SimHits')
236 slacapra 1.90
237 slacapra 1.153 self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
238 ewv 1.160 if self.sourceSeedMix:
239     print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
240     self.incrementSeeds.append('mix')
241 slacapra 1.90
242 slacapra 1.153 self.firstRun = cfg_params.get('CMSSW.first_run',None)
243 slacapra 1.90
244 spiga 1.42 if self.pset != None: #CarlosDaniele
245 ewv 1.131 import PsetManipulator as pp
246 slacapra 1.97 PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
247 gutsche 1.3
248 ewv 1.147 # Copy/return
249    
250 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
251     self.return_data = int(cfg_params.get('USER.return_data',0))
252 ewv 1.147
253 slacapra 1.1 #DBSDLS-start
254 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
255 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
256     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
257 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
258 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
259 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
260 gutsche 1.35 blockSites = {}
261 slacapra 1.9 if self.datasetPath:
262 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
263 ewv 1.131 #DBSDLS-end
264 slacapra 1.1
265     self.tgzNameWithPath = self.getTarBall(self.executable)
266 ewv 1.131
267 slacapra 1.9 ## Select Splitting
268 ewv 1.131 if self.selectNoInput:
269 spiga 1.42 if self.pset == None: #CarlosDaniele
270     self.jobSplittingForScript()
271     else:
272     self.jobSplittingNoInput()
273 gutsche 1.92 else:
274 corvo 1.56 self.jobSplittingByBlocks(blockSites)
275 gutsche 1.5
276 slacapra 1.22 # modify Pset
277 spiga 1.42 if self.pset != None: #CarlosDaniele
278 slacapra 1.86 try:
279 ewv 1.160 # Add FrameworkJobReport to parameter-set, set max events.
280     # Reset later for data jobs by writeCFG which does all modifications
281 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
282 ewv 1.160 PsetEdit.maxEvent(self.eventsPerJob)
283 slacapra 1.90 PsetEdit.psetWriter(self.configFilename())
284 slacapra 1.86 except:
285     msg='Error while manipuliating ParameterSet: exiting...'
286     raise CrabException(msg)
287 gutsche 1.3
288 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
289    
290 slacapra 1.86 import DataDiscovery
291     import DataLocation
292 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
293    
294     datasetPath=self.datasetPath
295    
296 slacapra 1.1 ## Contact the DBS
297 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
298 slacapra 1.1 try:
299 slacapra 1.137 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
300 slacapra 1.1 self.pubdata.fetchDBSInfo()
301    
302 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
303 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
304     raise CrabException(msg)
305 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
306 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
307     raise CrabException(msg)
308 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
309 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
310 slacapra 1.1 raise CrabException(msg)
311    
312 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
313 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
314     self.eventsbyfile=self.pubdata.getEventsPerFile()
315 gutsche 1.3
316 slacapra 1.1 ## get max number of events
317 ewv 1.131 self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
318 slacapra 1.1
319     ## Contact the DLS and build a list of sites hosting the fileblocks
320     try:
321 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
322 gutsche 1.6 dataloc.fetchDLSInfo()
323 slacapra 1.41 except DataLocation.DataLocationError , ex:
324 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
325     raise CrabException(msg)
326 ewv 1.131
327 slacapra 1.1
328 gutsche 1.35 sites = dataloc.getSites()
329     allSites = []
330     listSites = sites.values()
331 slacapra 1.63 for listSite in listSites:
332     for oneSite in listSite:
333 gutsche 1.35 allSites.append(oneSite)
334     allSites = self.uniquelist(allSites)
335 gutsche 1.3
336 gutsche 1.92 # screen output
337     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
338    
339 gutsche 1.35 return sites
340 ewv 1.131
341 mcinquil 1.140 def setArgsList(self, argsList):
342     self.argsList = argsList
343    
344 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
345 slacapra 1.9 """
346 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
347     and no more than one block.
348     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
349     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
350     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
351     self.maxEvents, self.filesbyblock
352     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
353     self.total_number_of_jobs - Total # of jobs
354     self.list_of_args - File(s) job will run on (a list of lists)
355     """
356    
357     # ---- Handle the possible job splitting configurations ---- #
358     if (self.selectTotalNumberEvents):
359     totalEventsRequested = self.total_number_of_events
360     if (self.selectEventsPerJob):
361     eventsPerJobRequested = self.eventsPerJob
362     if (self.selectNumberOfJobs):
363     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
364    
365     # If user requested all the events in the dataset
366     if (totalEventsRequested == -1):
367     eventsRemaining=self.maxEvents
368     # If user requested more events than are in the dataset
369     elif (totalEventsRequested > self.maxEvents):
370     eventsRemaining = self.maxEvents
371     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
372     # If user requested less events than are in the dataset
373     else:
374     eventsRemaining = totalEventsRequested
375 slacapra 1.22
376 slacapra 1.41 # If user requested more events per job than are in the dataset
377     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
378     eventsPerJobRequested = self.maxEvents
379    
380 gutsche 1.35 # For user info at end
381     totalEventCount = 0
382 gutsche 1.3
383 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
384     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
385 slacapra 1.22
386 gutsche 1.35 if (self.selectNumberOfJobs):
387     common.logger.message("May not create the exact number_of_jobs requested.")
388 slacapra 1.23
389 gutsche 1.38 if ( self.ncjobs == 'all' ) :
390     totalNumberOfJobs = 999999999
391     else :
392     totalNumberOfJobs = self.ncjobs
393 ewv 1.131
394 gutsche 1.38
395 gutsche 1.35 blocks = blockSites.keys()
396     blockCount = 0
397     # Backup variable in case self.maxEvents counted events in a non-included block
398     numBlocksInDataset = len(blocks)
399 gutsche 1.3
400 gutsche 1.35 jobCount = 0
401     list_of_lists = []
402 gutsche 1.3
403 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
404     jobsOfBlock = {}
405    
406 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
407     # ---- we've met the requested total # of events ---- #
408 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
409 gutsche 1.35 block = blocks[blockCount]
410 gutsche 1.44 blockCount += 1
411 gutsche 1.104 if block not in jobsOfBlock.keys() :
412     jobsOfBlock[block] = []
413 ewv 1.131
414 gutsche 1.68 if self.eventsbyblock.has_key(block) :
415     numEventsInBlock = self.eventsbyblock[block]
416     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
417 ewv 1.131
418 gutsche 1.68 files = self.filesbyblock[block]
419     numFilesInBlock = len(files)
420     if (numFilesInBlock <= 0):
421     continue
422     fileCount = 0
423    
424     # ---- New block => New job ---- #
425 ewv 1.131 parString = ""
426 gutsche 1.68 # counter for number of events in files currently worked on
427     filesEventCount = 0
428     # flag if next while loop should touch new file
429     newFile = 1
430     # job event counter
431     jobSkipEventCount = 0
432 ewv 1.131
433 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
434     # ---- total # of events or we've gone over all the files in this block ---- #
435     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
436     file = files[fileCount]
437     if newFile :
438     try:
439     numEventsInFile = self.eventsbyfile[file]
440     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
441     # increase filesEventCount
442     filesEventCount += numEventsInFile
443     # Add file to current job
444     parString += '\\\"' + file + '\\\"\,'
445     newFile = 0
446     except KeyError:
447     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
448 ewv 1.131
449 gutsche 1.38
450 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
451     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
452     # if last file in block
453     if ( fileCount == numFilesInBlock-1 ) :
454     # end job using last file, use remaining events in block
455     # close job and touch new file
456     fullString = parString[:-2]
457     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
458     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
459     self.jobDestination.append(blockSites[block])
460     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
461 gutsche 1.92 # fill jobs of block dictionary
462 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
463 gutsche 1.68 # reset counter
464     jobCount = jobCount + 1
465     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
466     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
467     jobSkipEventCount = 0
468     # reset file
469 ewv 1.131 parString = ""
470 gutsche 1.68 filesEventCount = 0
471     newFile = 1
472     fileCount += 1
473     else :
474     # go to next file
475     newFile = 1
476     fileCount += 1
477     # if events in file equal to eventsPerJobRequested
478     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
479 gutsche 1.38 # close job and touch new file
480     fullString = parString[:-2]
481 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
482     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
483 gutsche 1.38 self.jobDestination.append(blockSites[block])
484     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
485 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
486 gutsche 1.38 # reset counter
487     jobCount = jobCount + 1
488 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
489     eventsRemaining = eventsRemaining - eventsPerJobRequested
490 gutsche 1.38 jobSkipEventCount = 0
491     # reset file
492 ewv 1.131 parString = ""
493 gutsche 1.38 filesEventCount = 0
494     newFile = 1
495     fileCount += 1
496 ewv 1.131
497 gutsche 1.68 # if more events in file remain than eventsPerJobRequested
498 gutsche 1.38 else :
499 gutsche 1.68 # close job but don't touch new file
500     fullString = parString[:-2]
501     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
502     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
503     self.jobDestination.append(blockSites[block])
504     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
505 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
506 gutsche 1.68 # increase counter
507     jobCount = jobCount + 1
508     totalEventCount = totalEventCount + eventsPerJobRequested
509     eventsRemaining = eventsRemaining - eventsPerJobRequested
510     # calculate skip events for last file
511     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
512     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
513     # remove all but the last file
514     filesEventCount = self.eventsbyfile[file]
515 ewv 1.160 parString = '\\\"' + file + '\\\"\,'
516 gutsche 1.68 pass # END if
517     pass # END while (iterate over files in the block)
518 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
519 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
520 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
521 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
522 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
523 ewv 1.131
524 gutsche 1.92 # screen output
525     screenOutput = "List of jobs and available destination sites:\n\n"
526    
527 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
528     noSiteBlock = []
529     bloskNoSite = []
530    
531 gutsche 1.92 blockCounter = 0
532 gutsche 1.104 for block in blocks:
533     if block in jobsOfBlock.keys() :
534     blockCounter += 1
535 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
536 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
537 ewv 1.131 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
538 mcinquil 1.124 bloskNoSite.append( blockCounter )
539 ewv 1.131
540 mcinquil 1.124 common.logger.message(screenOutput)
541 fanzago 1.127 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
542 mcinquil 1.126 msg = 'WARNING: No sites are hosting any part of data for block:\n '
543     virgola = ""
544     if len(bloskNoSite) > 1:
545     virgola = ","
546     for block in bloskNoSite:
547     msg += ' ' + str(block) + virgola
548     msg += '\n Related jobs:\n '
549     virgola = ""
550     if len(noSiteBlock) > 1:
551     virgola = ","
552     for range_jobs in noSiteBlock:
553     msg += str(range_jobs) + virgola
554     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
555 slacapra 1.155 if self.cfg_params.has_key('EDG.se_white_list'):
556     msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
557     msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
558     msg += 'Please check if the dataset is available at this site!)\n'
559     if self.cfg_params.has_key('EDG.ce_white_list'):
560     msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
561     msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
562     msg += 'Please check if the dataset is available at this site!)\n'
563    
564 mcinquil 1.126 common.logger.message(msg)
565 gutsche 1.92
566 slacapra 1.9 self.list_of_args = list_of_lists
567     return
568    
569 slacapra 1.21 def jobSplittingNoInput(self):
570 slacapra 1.9 """
571     Perform job splitting based on number of event per job
572     """
573     common.logger.debug(5,'Splitting per events')
574 fanzago 1.130
575 ewv 1.131 if (self.selectEventsPerJob):
576 fanzago 1.130 common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
577     if (self.selectNumberOfJobs):
578     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
579     if (self.selectTotalNumberEvents):
580     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
581 slacapra 1.9
582 slacapra 1.10 if (self.total_number_of_events < 0):
583     msg='Cannot split jobs per Events with "-1" as total number of events'
584     raise CrabException(msg)
585    
586 slacapra 1.22 if (self.selectEventsPerJob):
587 spiga 1.65 if (self.selectTotalNumberEvents):
588     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
589 ewv 1.131 elif(self.selectNumberOfJobs) :
590 spiga 1.65 self.total_number_of_jobs =self.theNumberOfJobs
591 ewv 1.131 self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
592 spiga 1.65
593 slacapra 1.22 elif (self.selectNumberOfJobs) :
594     self.total_number_of_jobs = self.theNumberOfJobs
595     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
596 ewv 1.131
597 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
598    
599     # is there any remainder?
600     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
601    
602     common.logger.debug(5,'Check '+str(check))
603    
604 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
605 slacapra 1.9 if check > 0:
606 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
607 slacapra 1.9
608 slacapra 1.10 # argument is seed number.$i
609 slacapra 1.9 self.list_of_args = []
610     for i in range(self.total_number_of_jobs):
611 gutsche 1.35 ## Since there is no input, any site is good
612 ewv 1.131 self.jobDestination.append([""]) #must be empty to write correctly the xml
613 slacapra 1.90 args=[]
614 spiga 1.57 if (self.firstRun):
615 slacapra 1.138 ## pythia first run
616 slacapra 1.90 args.append(str(self.firstRun)+str(i))
617     self.list_of_args.append(args)
618 ewv 1.131
619 gutsche 1.3 return
620    
621 spiga 1.42
622     def jobSplittingForScript(self):#CarlosDaniele
623     """
624     Perform job splitting based on number of job
625     """
626     common.logger.debug(5,'Splitting per job')
627     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
628    
629     self.total_number_of_jobs = self.theNumberOfJobs
630    
631     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
632    
633     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
634    
635     # argument is seed number.$i
636     self.list_of_args = []
637     for i in range(self.total_number_of_jobs):
638     ## Since there is no input, any site is good
639     # self.jobDestination.append(["Any"])
640     self.jobDestination.append([""])
641     ## no random seed
642     self.list_of_args.append([str(i)])
643     return
644    
645 gutsche 1.3 def split(self, jobParams):
646 ewv 1.131
647 gutsche 1.3 common.jobDB.load()
648     #### Fabio
649     njobs = self.total_number_of_jobs
650 slacapra 1.9 arglist = self.list_of_args
651 gutsche 1.3 # create the empty structure
652     for i in range(njobs):
653     jobParams.append("")
654 ewv 1.131
655 gutsche 1.3 for job in range(njobs):
656 slacapra 1.17 jobParams[job] = arglist[job]
657     # print str(arglist[job])
658     # print jobParams[job]
659 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
660 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
661     common.jobDB.setDestination(job, self.jobDestination[job])
662 gutsche 1.3
663     common.jobDB.save()
664     return
665 ewv 1.131
666 gutsche 1.3 def getJobTypeArguments(self, nj, sched):
667 slacapra 1.17 result = ''
668     for i in common.jobDB.arguments(nj):
669     result=result+str(i)+" "
670     return result
671 ewv 1.131
672 gutsche 1.3 def numberOfJobs(self):
673     # Fabio
674     return self.total_number_of_jobs
675    
676 slacapra 1.1 def getTarBall(self, exe):
677     """
678     Return the TarBall with lib and exe
679     """
680 ewv 1.131
681 slacapra 1.1 # if it exist, just return it
682 corvo 1.56 #
683     # Marco. Let's start to use relative path for Boss XML files
684     #
685     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
686 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
687     return self.tgzNameWithPath
688    
689     # Prepare a tar gzipped file with user binaries.
690     self.buildTar_(exe)
691    
692     return string.strip(self.tgzNameWithPath)
693    
694     def buildTar_(self, executable):
695    
696     # First of all declare the user Scram area
697     swArea = self.scram.getSWArea_()
698     #print "swArea = ", swArea
699 slacapra 1.63 # swVersion = self.scram.getSWVersion()
700     # print "swVersion = ", swVersion
701 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
702     #print "swReleaseTop = ", swReleaseTop
703 ewv 1.131
704 slacapra 1.1 ## check if working area is release top
705     if swReleaseTop == '' or swArea == swReleaseTop:
706     return
707    
708 slacapra 1.61 import tarfile
709     try: # create tar ball
710     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
711     ## First find the executable
712 slacapra 1.86 if (self.executable != ''):
713 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
714     if ( not exeWithPath ):
715     raise CrabException('User executable '+executable+' not found')
716 ewv 1.131
717 slacapra 1.61 ## then check if it's private or not
718     if exeWithPath.find(swReleaseTop) == -1:
719     # the exe is private, so we must ship
720     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
721     path = swArea+'/'
722 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
723     if exeWithPath.find(path) >= 0 :
724     exe = string.replace(exeWithPath, path,'')
725 slacapra 1.129 tar.add(path+exe,exe)
726 corvo 1.85 else :
727     tar.add(exeWithPath,os.path.basename(executable))
728 slacapra 1.61 pass
729     else:
730     # the exe is from release, we'll find it on WN
731     pass
732 ewv 1.131
733 slacapra 1.61 ## Now get the libraries: only those in local working area
734     libDir = 'lib'
735     lib = swArea+'/' +libDir
736     common.logger.debug(5,"lib "+lib+" to be tarred")
737     if os.path.exists(lib):
738     tar.add(lib,libDir)
739 ewv 1.131
740 slacapra 1.61 ## Now check if module dir is present
741     moduleDir = 'module'
742     module = swArea + '/' + moduleDir
743     if os.path.isdir(module):
744     tar.add(module,moduleDir)
745    
746     ## Now check if any data dir(s) is present
747     swAreaLen=len(swArea)
748     for root, dirs, files in os.walk(swArea):
749     if "data" in dirs:
750     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
751     tar.add(root+"/data",root[swAreaLen:]+"/data")
752    
753 ewv 1.156 ### Removed ProdAgent Api dependencies ###
754 fanzago 1.152 ### Add ProdAgent dir to tar
755     #paDir = 'ProdAgentApi'
756     #pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
757     #if os.path.isdir(pa):
758     # tar.add(pa,paDir)
759 fanzago 1.93
760 fanzago 1.152 ## Add ProdCommon dir to tar
761 fanzago 1.93 prodcommonDir = 'ProdCommon'
762     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
763     if os.path.isdir(prodcommonPath):
764     tar.add(prodcommonPath,prodcommonDir)
765 ewv 1.131
766 slacapra 1.61 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
767     tar.close()
768     except :
769     raise CrabException('Could not create tar-ball')
770 gutsche 1.72
771     ## check for tarball size
772     tarballinfo = os.stat(self.tgzNameWithPath)
773     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
774     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
775    
776 slacapra 1.61 ## create tar-ball with ML stuff
777 ewv 1.131 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
778 slacapra 1.61 try:
779     tar = tarfile.open(self.MLtgzfile, "w:gz")
780     path=os.environ['CRABDIR'] + '/python/'
781 ewv 1.160 for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py','writeCfg.py']:
782 slacapra 1.61 tar.add(path+file,file)
783     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
784     tar.close()
785     except :
786 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
787 ewv 1.131
788 slacapra 1.1 return
789 ewv 1.131
790 slacapra 1.97 def additionalInputFileTgz(self):
791     """
792     Put all additional files into a tar ball and return its name
793     """
794     import tarfile
795     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
796     tar = tarfile.open(tarName, "w:gz")
797     for file in self.additional_inbox_files:
798     tar.add(file,string.split(file,'/')[-1])
799     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
800     tar.close()
801     return tarName
802    
803 slacapra 1.1 def wsSetupEnvironment(self, nj):
804     """
805     Returns part of a job script which prepares
806     the execution environment for the job 'nj'.
807     """
808     # Prepare JobType-independent part
809 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
810 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
811 ewv 1.131 txt += 'if [ $middleware == LCG ]; then \n'
812 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
813     txt += 'elif [ $middleware == OSG ]; then\n'
814 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
815 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
816 fanzago 1.161 #txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
817     #txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
818     #txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
819     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
820     #txt += ' exit 1\n'
821     txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
822     txt += ' job_exit_code=10016\n'
823     txt += ' func_exit\n'
824 gutsche 1.3 txt += ' fi\n'
825 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
826 gutsche 1.3 txt += '\n'
827     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
828     txt += ' cd $WORKING_DIR\n'
829 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
830 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
831 fanzago 1.133 #txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
832     #txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
833 gutsche 1.3 txt += 'fi\n'
834 slacapra 1.1
835     # Prepare JobType-specific part
836     scram = self.scram.commandName()
837     txt += '\n\n'
838 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
839     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
840 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
841     txt += 'status=$?\n'
842     txt += 'if [ $status != 0 ] ; then\n'
843 fanzago 1.161 #txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
844     #txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
845     #txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
846     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
847     txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
848     txt += ' job_exit_code=10034\n'
849 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
850     txt += ' cd $RUNTIME_AREA\n'
851 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
852     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
853 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
854     txt += ' if [ -d $WORKING_DIR ] ;then\n'
855 fanzago 1.161 #txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
856     #txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
857     #txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
858     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
859     txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
860     txt += ' job_exit_code=10018\n'
861 gutsche 1.3 txt += ' fi\n'
862     txt += ' fi \n'
863 fanzago 1.161 #txt += ' exit 1 \n'
864     txt += ' func_exit\n'
865 slacapra 1.1 txt += 'fi \n'
866     txt += 'cd '+self.version+'\n'
867 fanzago 1.99 ########## FEDE FOR DBS2 ######################
868     txt += 'SOFTWARE_DIR=`pwd`\n'
869 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
870 fanzago 1.99 ###############################################
871 slacapra 1.1 ### needed grep for bug in scramv1 ###
872     txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
873     # Handle the arguments:
874     txt += "\n"
875 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
876 slacapra 1.1 txt += "\n"
877 mcinquil 1.140 txt += "if [ $nargs -lt "+str(len(self.argsList[nj].split()))+" ]\n"
878 slacapra 1.1 txt += "then\n"
879 fanzago 1.161 #txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
880     #txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
881     #txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
882     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
883     txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
884     txt += ' job_exit_code=50113\n'
885 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
886     txt += ' cd $RUNTIME_AREA\n'
887 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
888     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
889 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
890     txt += ' if [ -d $WORKING_DIR ] ;then\n'
891 fanzago 1.161 #txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
892     #txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
893     #txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
894     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
895     txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
896     txt += ' job_exit_code=50114\n'
897 gutsche 1.3 txt += ' fi\n'
898 fanzago 1.161 txt += ' fi\n'
899     #txt += " exit 1\n"
900     txt += " func_exit\n"
901 slacapra 1.1 txt += "fi\n"
902     txt += "\n"
903    
904     # Prepare job-specific part
905     job = common.job_list[nj]
906 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
907 ewv 1.131 if (self.datasetPath):
908 fanzago 1.93 txt += '\n'
909     txt += 'DatasetPath='+self.datasetPath+'\n'
910    
911     datasetpath_split = self.datasetPath.split("/")
912 ewv 1.131
913 fanzago 1.93 txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
914     txt += 'DataTier='+datasetpath_split[2]+'\n'
915 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
916 fanzago 1.93
917     else:
918     txt += 'DatasetPath=MCDataTier\n'
919     txt += 'PrimaryDataset=null\n'
920     txt += 'DataTier=null\n'
921     txt += 'ApplicationFamily=MCDataTier\n'
922 fanzago 1.161 if self.pset != None:
923 spiga 1.42 pset = os.path.basename(job.configFilename())
924     txt += '\n'
925 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
926 spiga 1.42 if (self.datasetPath): # standard job
927 ewv 1.160 txt += 'InputFiles=${args[1]}; export InputFiles\n'
928     txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
929     txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
930 spiga 1.42 txt += 'echo "Inputfiles:<$InputFiles>"\n'
931     txt += 'echo "MaxEvents:<$MaxEvents>"\n'
932     txt += 'echo "SkipEvents:<$SkipEvents>"\n'
933     else: # pythia like job
934 ewv 1.160 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
935     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
936     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
937     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
938 slacapra 1.90 if (self.firstRun):
939 ewv 1.160 txt += 'FirstRun=${args[1]}; export FirstRun\n'
940 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
941 slacapra 1.90
942     txt += 'mv -f '+pset+' pset.cfg\n'
943 slacapra 1.1
944     if len(self.additional_inbox_files) > 0:
945 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
946     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
947     txt += 'fi\n'
948 ewv 1.131 pass
949 slacapra 1.1
950 fanzago 1.161 if self.pset != None:
951 spiga 1.42 txt += '\n'
952     txt += 'echo "***** cat pset.cfg *********"\n'
953     txt += 'cat pset.cfg\n'
954     txt += 'echo "****** end pset.cfg ********"\n'
955     txt += '\n'
956 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
957     txt += 'echo "PSETHASH = $PSETHASH" \n'
958 fanzago 1.93 txt += '\n'
959 gutsche 1.3 return txt
960    
961 slacapra 1.63 def wsBuildExe(self, nj=0):
962 gutsche 1.3 """
963     Put in the script the commands to build an executable
964     or a library.
965     """
966    
967 ewv 1.160 txt = '\n#Written by cms_cmssw::wsBuildExe\n'
968 gutsche 1.3
969     if os.path.isfile(self.tgzNameWithPath):
970 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
971 gutsche 1.3 txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
972     txt += 'untar_status=$? \n'
973     txt += 'if [ $untar_status -ne 0 ]; then \n'
974 fanzago 1.161 #txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
975     #txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
976     #txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
977     txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
978     txt += ' job_exit_code=$untar_status\n'
979     txt += ' echo "JobExitCode=$untar_status" >> $RUNTIME_AREA/$repo\n'
980 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
981     txt += ' cd $RUNTIME_AREA\n'
982 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
983     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
984 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
985     txt += ' if [ -d $WORKING_DIR ] ;then\n'
986 fanzago 1.161 #txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
987     #txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
988     #txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
989     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
990     txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
991     txt += ' job_exit_code=50999\n'
992 gutsche 1.3 txt += ' fi\n'
993     txt += ' fi \n'
994     txt += ' \n'
995 fanzago 1.161 #txt += ' exit 1 \n'
996     txt += ' func_exit\n'
997 gutsche 1.3 txt += 'else \n'
998     txt += ' echo "Successful untar" \n'
999     txt += 'fi \n'
1000 gutsche 1.50 txt += '\n'
1001 fanzago 1.152 #### Removed ProdAgent API dependencies
1002     txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
1003 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1004 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
1005 fanzago 1.152 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
1006 gutsche 1.50 txt += 'else\n'
1007 fanzago 1.152 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1008 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1009 ewv 1.131 ###################
1010 gutsche 1.50 txt += 'fi\n'
1011     txt += '\n'
1012    
1013 gutsche 1.3 pass
1014 ewv 1.131
1015 slacapra 1.1 return txt
1016    
1017     def modifySteeringCards(self, nj):
1018     """
1019 ewv 1.131 modify the card provided by the user,
1020 slacapra 1.1 writing a new card into share dir
1021     """
1022 ewv 1.131
1023 slacapra 1.1 def executableName(self):
1024 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1025 spiga 1.42 return "sh "
1026     else:
1027     return self.executable
1028 slacapra 1.1
1029     def executableArgs(self):
1030 ewv 1.160 # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
1031 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1032 spiga 1.42 return self.scriptExe + " $NJob"
1033 fanzago 1.115 else:
1034     version_array = self.scram.getSWVersion().split('_')
1035     major = 0
1036     minor = 0
1037     try:
1038     major = int(version_array[1])
1039     minor = int(version_array[2])
1040     except:
1041 ewv 1.131 msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1042 fanzago 1.115 raise CrabException(msg)
1043 ewv 1.160
1044     ex_args = ""
1045    
1046     # Framework job report
1047 fanzago 1.115 if major >= 1 and minor >= 5 :
1048 ewv 1.160 ex_args += " -j " + self.fjrFileName
1049    
1050     # Type of cfg file
1051     if major >= 2 :
1052     ex_args += " -p pset.pycfg"
1053 fanzago 1.115 else:
1054 ewv 1.160 ex_args += " -p pset.cfg"
1055     return ex_args
1056 slacapra 1.1
1057     def inputSandbox(self, nj):
1058     """
1059     Returns a list of filenames to be put in JDL input sandbox.
1060     """
1061     inp_box = []
1062 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1063     # seen = {}
1064 slacapra 1.1 ## code
1065     if os.path.isfile(self.tgzNameWithPath):
1066     inp_box.append(self.tgzNameWithPath)
1067 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1068     inp_box.append(self.MLtgzfile)
1069 slacapra 1.1 ## config
1070 slacapra 1.70 if not self.pset is None:
1071 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1072 slacapra 1.1 ## additional input files
1073 slacapra 1.97 tgz = self.additionalInputFileTgz()
1074     inp_box.append(tgz)
1075 slacapra 1.1 return inp_box
1076    
1077     def outputSandbox(self, nj):
1078     """
1079     Returns a list of filenames to be put in JDL output sandbox.
1080     """
1081     out_box = []
1082    
1083     ## User Declared output files
1084 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1085 ewv 1.131 n_out = nj + 1
1086 slacapra 1.1 out_box.append(self.numberFile_(out,str(n_out)))
1087     return out_box
1088    
1089     def prepareSteeringCards(self):
1090     """
1091     Make initial modifications of the user's steering card file.
1092     """
1093     return
1094    
1095     def wsRenameOutput(self, nj):
1096     """
1097     Returns part of a job script which renames the produced files.
1098     """
1099    
1100 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1101 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1102     txt += 'echo ">>> current directory content:"\n'
1103 gutsche 1.7 txt += 'ls \n'
1104 fanzago 1.145 txt += '\n'
1105 slacapra 1.54
1106 fanzago 1.161 #txt += 'output_exit_status=0\n'
1107 ewv 1.131
1108 fanzago 1.128 for fileWithSuffix in (self.output_file_sandbox):
1109     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1110     txt += '\n'
1111     txt += '# check output file\n'
1112     txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1113 mcinquil 1.144 txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1114     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1115 fanzago 1.128 txt += 'else\n'
1116 fanzago 1.161 #txt += ' exit_status=60302\n'
1117     #txt += ' echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1118     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1119     txt += ' job_exit_code=60302\n'
1120 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
1121 fanzago 1.128 txt += ' if [ $middleware == OSG ]; then \n'
1122     txt += ' echo "prepare dummy output file"\n'
1123     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1124     txt += ' fi \n'
1125     txt += 'fi\n'
1126 ewv 1.131
1127 fanzago 1.128 for fileWithSuffix in (self.output_file):
1128 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1129     txt += '\n'
1130 gutsche 1.7 txt += '# check output file\n'
1131 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1132 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1133     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
1134     txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1135     else:
1136     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1137     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1138 slacapra 1.106 txt += 'else\n'
1139 fanzago 1.161 #txt += ' exit_status=60302\n'
1140     #txt += ' echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1141     #txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1142     #txt += ' output_exit_status=$exit_status\n'
1143     txt += ' job_exit_code=60302\n'
1144     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1145 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
1146 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
1147     txt += ' echo "prepare dummy output file"\n'
1148     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1149     txt += ' fi \n'
1150 slacapra 1.1 txt += 'fi\n'
1151 slacapra 1.105 file_list = []
1152     for fileWithSuffix in (self.output_file):
1153     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1154 ewv 1.131
1155 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1156 fanzago 1.149 txt += '\n'
1157 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1158     txt += 'echo ">>> current directory content:"\n'
1159     txt += 'ls \n'
1160     txt += '\n'
1161 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1162 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1163 slacapra 1.1 return txt
1164    
1165     def numberFile_(self, file, txt):
1166     """
1167     append _'txt' before last extension of a file
1168     """
1169     p = string.split(file,".")
1170     # take away last extension
1171     name = p[0]
1172     for x in p[1:-1]:
1173 slacapra 1.90 name=name+"."+x
1174 slacapra 1.1 # add "_txt"
1175     if len(p)>1:
1176 slacapra 1.90 ext = p[len(p)-1]
1177     result = name + '_' + txt + "." + ext
1178 slacapra 1.1 else:
1179 slacapra 1.90 result = name + '_' + txt
1180 ewv 1.131
1181 slacapra 1.1 return result
1182    
1183 slacapra 1.63 def getRequirements(self, nj=[]):
1184 slacapra 1.1 """
1185 ewv 1.131 return job requirements to add to jdl files
1186 slacapra 1.1 """
1187     req = ''
1188 slacapra 1.47 if self.version:
1189 slacapra 1.10 req='Member("VO-cms-' + \
1190 slacapra 1.47 self.version + \
1191 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1192 farinafa 1.111 ## SL add requirement for OS version only if SL4
1193     #reSL4 = re.compile( r'slc4' )
1194 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1195 gutsche 1.107 req+=' && Member("VO-cms-' + \
1196 slacapra 1.105 self.executable_arch + \
1197     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1198 gutsche 1.35
1199     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1200 afanfani 1.158 if common.scheduler.name() == "glitecoll":
1201     req += ' && other.GlueCEStateStatus == "Production" '
1202 gutsche 1.35
1203 slacapra 1.1 return req
1204 gutsche 1.3
1205     def configFilename(self):
1206     """ return the config filename """
1207     return self.name()+'.cfg'
1208    
1209     def wsSetupCMSOSGEnvironment_(self):
1210     """
1211     Returns part of a job script which is prepares
1212     the execution environment and which is common for all CMS jobs.
1213     """
1214 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1215     txt += ' echo ">>> setup CMS OSG environment:"\n'
1216 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1217     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1218 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1219 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1220 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1221 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1222     txt += ' else\n'
1223 fanzago 1.161 #txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1224     #txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1225     #txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1226     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1227     txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1228     txt += ' job_exit_code=10020\n'
1229 fanzago 1.133 txt += ' cd $RUNTIME_AREA\n'
1230     txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1231     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1232     txt += ' /bin/rm -rf $WORKING_DIR\n'
1233     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1234 fanzago 1.161 #txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1235     #txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1236     #txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1237     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1238     txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1239     txt += ' job_exit_code=10017\n'
1240 fanzago 1.133 txt += ' fi\n'
1241 gutsche 1.3 txt += '\n'
1242 fanzago 1.161 #txt += ' exit 1\n'
1243     txt += ' func_exit\n'
1244 fanzago 1.133 txt += ' fi\n'
1245 gutsche 1.3 txt += '\n'
1246 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1247 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1248 gutsche 1.3
1249     return txt
1250 ewv 1.131
1251 gutsche 1.3 ### OLI_DANIELE
1252     def wsSetupCMSLCGEnvironment_(self):
1253     """
1254     Returns part of a job script which is prepares
1255     the execution environment and which is common for all CMS jobs.
1256     """
1257 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1258     txt += ' echo ">>> setup CMS LCG environment:"\n'
1259 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1260     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1261     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1262     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1263 fanzago 1.161 #txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1264     #txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1265     #txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1266     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1267     #txt += ' exit 1\n'
1268     txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1269     txt += ' job_exit_code=10031\n'
1270     txt += ' func_exit\n'
1271 fanzago 1.133 txt += ' else\n'
1272     txt += ' echo "Sourcing environment... "\n'
1273     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1274 fanzago 1.161 #txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1275     #txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1276     #txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1277     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1278     #txt += ' exit 1\n'
1279     txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1280     txt += ' job_exit_code=10020\n'
1281     txt += ' func_exit\n'
1282 fanzago 1.133 txt += ' fi\n'
1283     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1284     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1285     txt += ' result=$?\n'
1286     txt += ' if [ $result -ne 0 ]; then\n'
1287 fanzago 1.161 #txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1288     #txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1289     #txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1290     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1291     #txt += ' exit 1\n'
1292     txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1293     txt += ' job_exit_code=10032\n'
1294     txt += ' func_exit\n'
1295 fanzago 1.133 txt += ' fi\n'
1296     txt += ' fi\n'
1297     txt += ' \n'
1298 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1299 gutsche 1.3 return txt
1300 gutsche 1.5
1301 ewv 1.131 ### FEDE FOR DBS OUTPUT PUBLICATION
1302 fanzago 1.93 def modifyReport(self, nj):
1303     """
1304 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1305 fanzago 1.93 """
1306 fanzago 1.94
1307 ewv 1.160 txt = '\n#Written by cms_cmssw::modifyReport\n'
1308 fanzago 1.94 try:
1309 ewv 1.131 publish_data = int(self.cfg_params['USER.publish_data'])
1310 fanzago 1.94 except KeyError:
1311     publish_data = 0
1312 ewv 1.131 if (publish_data == 1):
1313 fanzago 1.161
1314     txt += 'if [ $copy_exit_status -eq 0 ]; then\n'
1315     txt += ' echo ">>> Modify Job Report:" \n'
1316     txt += ' chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1317     #txt += ' if [ -z "$SE" ]; then\n'
1318     #txt += ' SE="" \n'
1319     #txt += ' fi \n'
1320     #txt += ' if [ -z "$SE_PATH" ]; then\n'
1321     #txt += ' SE_PATH="" \n'
1322     #txt += ' fi \n'
1323     txt += ' echo "SE = $SE"\n'
1324     txt += ' echo "SE_PATH = $SE_PATH"\n'
1325 fanzago 1.94
1326     processedDataset = self.cfg_params['USER.publish_data_name']
1327 fanzago 1.161 txt += ' ProcessedDataset='+processedDataset+'\n'
1328     #txt += ' if [ "$SE_PATH" == "" ]; then\n'
1329     #txt += ' FOR_LFN=/copy_problems/ \n'
1330     #txt += ' else \n'
1331     #txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1332     #txt += ' FOR_LFN=/store$tmp \n'
1333     #txt += ' fi \n'
1334 fanzago 1.94 txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1335     txt += ' FOR_LFN=/store$tmp \n'
1336 fanzago 1.161 txt += ' echo "ProcessedDataset = $ProcessedDataset"\n'
1337     txt += ' echo "FOR_LFN = $FOR_LFN" \n'
1338     txt += ' echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1339     txt += ' echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1340     txt += ' $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1341    
1342     txt += ' modifyReport_result=$?\n'
1343     txt += ' if [ $modifyReport_result -ne 0 ]; then\n'
1344     txt += ' modifyReport_result=70500\n'
1345     txt += ' job_exit_code=$modifyReport_result\n'
1346     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1347     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1348     txt += ' else\n'
1349     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1350     txt += ' fi\n'
1351 spiga 1.103 txt += 'fi\n'
1352 fanzago 1.93 return txt
1353 fanzago 1.99
1354     def cleanEnv(self):
1355 ewv 1.160 txt = '\n#Written by cms_cmssw::cleanEnv\n'
1356 ewv 1.131 txt += 'if [ $middleware == OSG ]; then\n'
1357 fanzago 1.99 txt += ' cd $RUNTIME_AREA\n'
1358 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1359     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1360 fanzago 1.99 txt += ' /bin/rm -rf $WORKING_DIR\n'
1361     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1362 fanzago 1.161 #txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1363     #txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1364     #txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1365     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1366     txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1367     txt += ' job_exit_code=60999\n'
1368     txt += ' func_exit\n'
1369 fanzago 1.99 txt += ' fi\n'
1370     txt += 'fi\n'
1371     txt += '\n'
1372     return txt
1373 fanzago 1.93
1374 gutsche 1.5 def setParam_(self, param, value):
1375     self._params[param] = value
1376    
1377     def getParams(self):
1378     return self._params
1379 gutsche 1.8
1380 gutsche 1.35 def uniquelist(self, old):
1381     """
1382     remove duplicates from a list
1383     """
1384     nd={}
1385     for e in old:
1386     nd[e]=0
1387     return nd.keys()
1388 mcinquil 1.121
1389    
1390     def checkOut(self, limit):
1391     """
1392     check the dimension of the output files
1393     """
1394 ewv 1.160 txt = '\n#Written by cms_cmssw::checkOut\n'
1395     txt += 'echo ">>> Starting output sandbox limit check :"\n'
1396 mcinquil 1.121 listOutFiles = []
1397 slacapra 1.151 txt += 'stdoutFile=`ls *stdout` \n'
1398     txt += 'stderrFile=`ls *stderr` \n'
1399 fanzago 1.148 if (self.return_data == 1):
1400 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1401     listOutFiles.append(self.numberFile_(file, '$NJob'))
1402     listOutFiles.append('$stdoutFile')
1403     listOutFiles.append('$stderrFile')
1404 ewv 1.156 else:
1405 spiga 1.157 for file in (self.output_file_sandbox):
1406     listOutFiles.append(self.numberFile_(file, '$NJob'))
1407     listOutFiles.append('$stdoutFile')
1408     listOutFiles.append('$stderrFile')
1409 ewv 1.159
1410 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1411 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1412 mcinquil 1.121 txt += 'ls -gGhrta;\n'
1413     txt += 'sum=0;\n'
1414 spiga 1.157 txt += 'for file in $filesToCheck ; do\n'
1415 mcinquil 1.121 txt += ' if [ -e $file ]; then\n'
1416     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1417     txt += ' sum=`expr $sum + $tt`\n'
1418     txt += ' else\n'
1419     txt += ' echo "WARNING: output file $file not found!"\n'
1420     txt += ' fi\n'
1421     txt += 'done\n'
1422     txt += 'echo "Total Output dimension: $sum";\n'
1423     txt += 'limit='+str(limit)+';\n'
1424 fanzago 1.161 txt += 'echo "WARNING: output files size limit is set to: $limit";\n'
1425 mcinquil 1.121 txt += 'if [ $limit -lt $sum ]; then\n'
1426     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1427     txt += ' echo " checking the output file sizes..."\n'
1428     txt += ' tot=0;\n'
1429 spiga 1.157 txt += ' for filefile in $filesToCheck ; do\n'
1430 mcinquil 1.143 txt += ' dimFile=`ls -gGrta $filefile | awk \'{ print $3 }\';`\n'
1431 mcinquil 1.121 txt += ' tot=`expr $tot + $tt`;\n'
1432 mcinquil 1.143 txt += ' if [ $limit -lt $dimFile ]; then\n'
1433     txt += ' echo "deleting file: $filefile";\n'
1434     txt += ' rm -f $filefile\n'
1435     txt += ' elif [ $limit -lt $tot ]; then\n'
1436     txt += ' echo "deleting file: $filefile";\n'
1437     txt += ' rm -f $filefile\n'
1438     txt += ' else\n'
1439     txt += ' echo "saving file: $filefile"\n'
1440 mcinquil 1.121 txt += ' fi\n'
1441     txt += ' done\n'
1442 mcinquil 1.143
1443 fanzago 1.161 txt += ' ls -agGhrt\n'
1444     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox."\n'
1445     #txt += ' echo "JOB_EXIT_STATUS = 70000"\n'
1446     #txt += ' exit_status=70000\n'
1447     txt += ' job_exit_code=70000\n'
1448     txt += 'else\n'
1449     txt += ' echo "Total Output dimension $sum is fine."\n'
1450 mcinquil 1.121 txt += 'fi\n'
1451 fanzago 1.133 txt += 'echo "Ending output sandbox limit check"\n'
1452 mcinquil 1.121 return txt