ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.166
Committed: Tue Mar 18 15:43:43 2008 UTC (17 years, 1 month ago) by fanzago
Content type: text/x-python
Branch: MAIN
Changes since 1.165: +106 -85 lines
Log Message:
changes to write the FJR from beginning of job wrapper

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8    
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 mcinquil 1.144 def __init__(self, cfg_params, ncjobs):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14     common.logger.debug(3,'CMSSW::__init__')
15    
16 mcinquil 1.140 self.argsList = []
17 mcinquil 1.144
18 gutsche 1.3 self._params = {}
19     self.cfg_params = cfg_params
20 fanzago 1.115 # init BlackWhiteListParser
21     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
22    
23 slacapra 1.153 self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
24 gutsche 1.72
25 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
26 gutsche 1.38 self.ncjobs = ncjobs
27    
28 slacapra 1.1 log = common.logger
29 ewv 1.131
30 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
31     self.additional_inbox_files = []
32     self.scriptExe = ''
33     self.executable = ''
34 slacapra 1.71 self.executable_arch = self.scram.getArch()
35 slacapra 1.1 self.tgz_name = 'default.tgz'
36 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
37 corvo 1.56 self.scriptName = 'CMSSW.sh'
38 ewv 1.131 self.pset = '' #scrip use case Da
39 spiga 1.42 self.datasetPath = '' #scrip use case Da
40 gutsche 1.3
41 gutsche 1.50 # set FJR file name
42     self.fjrFileName = 'crab_fjr.xml'
43    
44 slacapra 1.1 self.version = self.scram.getSWVersion()
45 ewv 1.131
46 spiga 1.114 #
47     # Try to block creation in case of arch/version mismatch
48     #
49    
50 spiga 1.162 # a = string.split(self.version, "_")
51     #
52     # if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
53     # msg = "Warning: You are using %s version of CMSSW with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
54     # common.logger.message(msg)
55     # if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
56     # msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
57     # raise CrabException(msg)
58     #
59    
60 gutsche 1.5 self.setParam_('application', self.version)
61 slacapra 1.47
62 slacapra 1.1 ### collect Data cards
63 gutsche 1.66
64 slacapra 1.153 if not cfg_params.has_key('CMSSW.datasetpath'):
65 ewv 1.131 msg = "Error: datasetpath not defined "
66 slacapra 1.1 raise CrabException(msg)
67 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
68     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
69     if string.lower(tmp)=='none':
70     self.datasetPath = None
71     self.selectNoInput = 1
72     else:
73     self.datasetPath = tmp
74     self.selectNoInput = 0
75 gutsche 1.5
76     # ML monitoring
77     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
78 slacapra 1.9 if not self.datasetPath:
79     self.setParam_('dataset', 'None')
80     self.setParam_('owner', 'None')
81     else:
82 slacapra 1.153 ## SL what is supposed to fail here?
83 gutsche 1.92 try:
84     datasetpath_split = self.datasetPath.split("/")
85     # standard style
86 mcinquil 1.120 self.setParam_('datasetFull', self.datasetPath)
87 slacapra 1.137 self.setParam_('dataset', datasetpath_split[1])
88     self.setParam_('owner', datasetpath_split[2])
89 gutsche 1.92 except:
90     self.setParam_('dataset', self.datasetPath)
91     self.setParam_('owner', self.datasetPath)
92 ewv 1.131
93 spiga 1.162 self.setParam_('taskId', common._db.queryTask('name')) ## new BL--DS
94 gutsche 1.5
95 slacapra 1.1 self.dataTiers = []
96    
97     ## now the application
98 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
99     self.setParam_('exe', self.executable)
100     log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
101 slacapra 1.1
102 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
103 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
104 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
105     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
106     if self.pset.lower() != 'none' :
107     if (not os.path.exists(self.pset)):
108     raise CrabException("User defined PSet file "+self.pset+" does not exist")
109     else:
110     self.pset = None
111 slacapra 1.1
112     # output files
113 slacapra 1.53 ## stuff which must be returned always via sandbox
114     self.output_file_sandbox = []
115    
116     # add fjr report by default via sandbox
117     self.output_file_sandbox.append(self.fjrFileName)
118    
119     # other output files to be returned via sandbox or copied to SE
120 slacapra 1.153 self.output_file = []
121     tmp = cfg_params.get('CMSSW.output_file',None)
122     if tmp :
123     tmpOutFiles = string.split(tmp,',')
124     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
125     for tmp in tmpOutFiles:
126     tmp=string.strip(tmp)
127     self.output_file.append(tmp)
128 slacapra 1.1 pass
129 slacapra 1.153 else:
130 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
131 slacapra 1.153 pass
132 slacapra 1.1
133     # script_exe file as additional file in inputSandbox
134 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
135     if self.scriptExe :
136     if not os.path.isfile(self.scriptExe):
137     msg ="ERROR. file "+self.scriptExe+" not found"
138     raise CrabException(msg)
139     self.additional_inbox_files.append(string.strip(self.scriptExe))
140 slacapra 1.70
141 spiga 1.42 #CarlosDaniele
142     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
143 slacapra 1.70 msg ="Error. script_exe not defined"
144 spiga 1.42 raise CrabException(msg)
145    
146 slacapra 1.1 ## additional input files
147 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
148 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
149 slacapra 1.70 for tmp in tmpAddFiles:
150     tmp = string.strip(tmp)
151     dirname = ''
152     if not tmp[0]=="/": dirname = "."
153 corvo 1.85 files = []
154     if string.find(tmp,"*")>-1:
155     files = glob.glob(os.path.join(dirname, tmp))
156     if len(files)==0:
157     raise CrabException("No additional input file found with this pattern: "+tmp)
158     else:
159     files.append(tmp)
160 slacapra 1.70 for file in files:
161     if not os.path.exists(file):
162     raise CrabException("Additional input file not found: "+file)
163 slacapra 1.45 pass
164 slacapra 1.105 # fname = string.split(file, '/')[-1]
165     # storedFile = common.work_space.pathForTgz()+'share/'+fname
166     # shutil.copyfile(file, storedFile)
167     self.additional_inbox_files.append(string.strip(file))
168 slacapra 1.1 pass
169     pass
170 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
171 slacapra 1.153 pass
172 gutsche 1.3
173 slacapra 1.9 ## Events per job
174 slacapra 1.153 if cfg_params.has_key('CMSSW.events_per_job'):
175 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
176 slacapra 1.9 self.selectEventsPerJob = 1
177 slacapra 1.153 else:
178 slacapra 1.9 self.eventsPerJob = -1
179     self.selectEventsPerJob = 0
180 ewv 1.131
181 slacapra 1.22 ## number of jobs
182 slacapra 1.153 if cfg_params.has_key('CMSSW.number_of_jobs'):
183 slacapra 1.22 self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
184     self.selectNumberOfJobs = 1
185 slacapra 1.153 else:
186 slacapra 1.22 self.theNumberOfJobs = 0
187     self.selectNumberOfJobs = 0
188 slacapra 1.10
189 slacapra 1.153 if cfg_params.has_key('CMSSW.total_number_of_events'):
190 gutsche 1.35 self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
191     self.selectTotalNumberEvents = 1
192 slacapra 1.153 else:
193 gutsche 1.35 self.total_number_of_events = 0
194     self.selectTotalNumberEvents = 0
195    
196 ewv 1.131 if self.pset != None: #CarlosDaniele
197 spiga 1.42 if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
198     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
199     raise CrabException(msg)
200     else:
201     if (self.selectNumberOfJobs == 0):
202     msg = 'Must specify number_of_jobs.'
203     raise CrabException(msg)
204 gutsche 1.35
205 ewv 1.160 ## New method of dealing with seeds
206     self.incrementSeeds = []
207     self.preserveSeeds = []
208     if cfg_params.has_key('CMSSW.preserve_seeds'):
209     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
210     for tmp in tmpList:
211     tmp.strip()
212     self.preserveSeeds.append(tmp)
213     if cfg_params.has_key('CMSSW.increment_seeds'):
214     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
215     for tmp in tmpList:
216     tmp.strip()
217     self.incrementSeeds.append(tmp)
218    
219     ## Old method of dealing with seeds
220     ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
221     ## remove
222 slacapra 1.153 self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
223 ewv 1.160 if self.sourceSeed:
224     print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
225     self.incrementSeeds.append('sourceSeed')
226 slacapra 1.153
227     self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
228 ewv 1.160 if self.sourceSeedVtx:
229     print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
230     self.incrementSeeds.append('VtxSmeared')
231 slacapra 1.22
232 slacapra 1.153 self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
233 ewv 1.160 if self.sourceSeedG4:
234     print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
235     self.incrementSeeds.append('g4SimHits')
236 slacapra 1.90
237 slacapra 1.153 self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
238 ewv 1.160 if self.sourceSeedMix:
239     print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
240     self.incrementSeeds.append('mix')
241 slacapra 1.90
242 slacapra 1.153 self.firstRun = cfg_params.get('CMSSW.first_run',None)
243 slacapra 1.90
244 spiga 1.42 if self.pset != None: #CarlosDaniele
245 ewv 1.131 import PsetManipulator as pp
246 slacapra 1.97 PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
247 gutsche 1.3
248 ewv 1.147 # Copy/return
249    
250 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
251     self.return_data = int(cfg_params.get('USER.return_data',0))
252 ewv 1.147
253 slacapra 1.1 #DBSDLS-start
254 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
255 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
256     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
257 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
258 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
259 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
260 gutsche 1.35 blockSites = {}
261 slacapra 1.9 if self.datasetPath:
262 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
263 ewv 1.131 #DBSDLS-end
264 slacapra 1.1
265     self.tgzNameWithPath = self.getTarBall(self.executable)
266 ewv 1.131
267 slacapra 1.9 ## Select Splitting
268 ewv 1.131 if self.selectNoInput:
269 spiga 1.42 if self.pset == None: #CarlosDaniele
270     self.jobSplittingForScript()
271     else:
272     self.jobSplittingNoInput()
273 gutsche 1.92 else:
274 corvo 1.56 self.jobSplittingByBlocks(blockSites)
275 gutsche 1.5
276 slacapra 1.22 # modify Pset
277 spiga 1.42 if self.pset != None: #CarlosDaniele
278 slacapra 1.86 try:
279 ewv 1.160 # Add FrameworkJobReport to parameter-set, set max events.
280     # Reset later for data jobs by writeCFG which does all modifications
281 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
282 ewv 1.160 PsetEdit.maxEvent(self.eventsPerJob)
283 slacapra 1.90 PsetEdit.psetWriter(self.configFilename())
284 slacapra 1.86 except:
285     msg='Error while manipuliating ParameterSet: exiting...'
286     raise CrabException(msg)
287 gutsche 1.3
288 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
289    
290 slacapra 1.86 import DataDiscovery
291     import DataLocation
292 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
293    
294     datasetPath=self.datasetPath
295    
296 slacapra 1.1 ## Contact the DBS
297 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
298 slacapra 1.1 try:
299 slacapra 1.137 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
300 slacapra 1.1 self.pubdata.fetchDBSInfo()
301    
302 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
303 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
304     raise CrabException(msg)
305 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
306 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
307     raise CrabException(msg)
308 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
309 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
310 slacapra 1.1 raise CrabException(msg)
311    
312 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
313 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
314     self.eventsbyfile=self.pubdata.getEventsPerFile()
315 gutsche 1.3
316 slacapra 1.1 ## get max number of events
317 ewv 1.131 self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
318 slacapra 1.1
319     ## Contact the DLS and build a list of sites hosting the fileblocks
320     try:
321 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
322 gutsche 1.6 dataloc.fetchDLSInfo()
323 slacapra 1.41 except DataLocation.DataLocationError , ex:
324 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
325     raise CrabException(msg)
326 ewv 1.131
327 slacapra 1.1
328 gutsche 1.35 sites = dataloc.getSites()
329     allSites = []
330     listSites = sites.values()
331 slacapra 1.63 for listSite in listSites:
332     for oneSite in listSite:
333 gutsche 1.35 allSites.append(oneSite)
334     allSites = self.uniquelist(allSites)
335 gutsche 1.3
336 gutsche 1.92 # screen output
337     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
338    
339 gutsche 1.35 return sites
340 ewv 1.131
341 spiga 1.165 # to Be Removed DS -- BL
342     # def setArgsList(self, argsList):
343     # self.argsList = argsList
344 mcinquil 1.140
345 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
346 slacapra 1.9 """
347 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
348     and no more than one block.
349     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
350     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
351     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
352     self.maxEvents, self.filesbyblock
353     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
354     self.total_number_of_jobs - Total # of jobs
355     self.list_of_args - File(s) job will run on (a list of lists)
356     """
357    
358     # ---- Handle the possible job splitting configurations ---- #
359     if (self.selectTotalNumberEvents):
360     totalEventsRequested = self.total_number_of_events
361     if (self.selectEventsPerJob):
362     eventsPerJobRequested = self.eventsPerJob
363     if (self.selectNumberOfJobs):
364     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
365    
366     # If user requested all the events in the dataset
367     if (totalEventsRequested == -1):
368     eventsRemaining=self.maxEvents
369     # If user requested more events than are in the dataset
370     elif (totalEventsRequested > self.maxEvents):
371     eventsRemaining = self.maxEvents
372     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
373     # If user requested less events than are in the dataset
374     else:
375     eventsRemaining = totalEventsRequested
376 slacapra 1.22
377 slacapra 1.41 # If user requested more events per job than are in the dataset
378     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
379     eventsPerJobRequested = self.maxEvents
380    
381 gutsche 1.35 # For user info at end
382     totalEventCount = 0
383 gutsche 1.3
384 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
385     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
386 slacapra 1.22
387 gutsche 1.35 if (self.selectNumberOfJobs):
388     common.logger.message("May not create the exact number_of_jobs requested.")
389 slacapra 1.23
390 gutsche 1.38 if ( self.ncjobs == 'all' ) :
391     totalNumberOfJobs = 999999999
392     else :
393     totalNumberOfJobs = self.ncjobs
394 ewv 1.131
395 gutsche 1.38
396 gutsche 1.35 blocks = blockSites.keys()
397     blockCount = 0
398     # Backup variable in case self.maxEvents counted events in a non-included block
399     numBlocksInDataset = len(blocks)
400 gutsche 1.3
401 gutsche 1.35 jobCount = 0
402     list_of_lists = []
403 gutsche 1.3
404 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
405     jobsOfBlock = {}
406    
407 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
408     # ---- we've met the requested total # of events ---- #
409 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
410 gutsche 1.35 block = blocks[blockCount]
411 gutsche 1.44 blockCount += 1
412 gutsche 1.104 if block not in jobsOfBlock.keys() :
413     jobsOfBlock[block] = []
414 ewv 1.131
415 gutsche 1.68 if self.eventsbyblock.has_key(block) :
416     numEventsInBlock = self.eventsbyblock[block]
417     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
418 ewv 1.131
419 gutsche 1.68 files = self.filesbyblock[block]
420     numFilesInBlock = len(files)
421     if (numFilesInBlock <= 0):
422     continue
423     fileCount = 0
424    
425     # ---- New block => New job ---- #
426 ewv 1.131 parString = ""
427 gutsche 1.68 # counter for number of events in files currently worked on
428     filesEventCount = 0
429     # flag if next while loop should touch new file
430     newFile = 1
431     # job event counter
432     jobSkipEventCount = 0
433 ewv 1.131
434 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
435     # ---- total # of events or we've gone over all the files in this block ---- #
436     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
437     file = files[fileCount]
438     if newFile :
439     try:
440     numEventsInFile = self.eventsbyfile[file]
441     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
442     # increase filesEventCount
443     filesEventCount += numEventsInFile
444     # Add file to current job
445     parString += '\\\"' + file + '\\\"\,'
446     newFile = 0
447     except KeyError:
448     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
449 ewv 1.131
450 gutsche 1.38
451 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
452     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
453     # if last file in block
454     if ( fileCount == numFilesInBlock-1 ) :
455     # end job using last file, use remaining events in block
456     # close job and touch new file
457     fullString = parString[:-2]
458     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
459     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
460     self.jobDestination.append(blockSites[block])
461     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
462 gutsche 1.92 # fill jobs of block dictionary
463 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
464 gutsche 1.68 # reset counter
465     jobCount = jobCount + 1
466     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
467     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
468     jobSkipEventCount = 0
469     # reset file
470 ewv 1.131 parString = ""
471 gutsche 1.68 filesEventCount = 0
472     newFile = 1
473     fileCount += 1
474     else :
475     # go to next file
476     newFile = 1
477     fileCount += 1
478     # if events in file equal to eventsPerJobRequested
479     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
480 gutsche 1.38 # close job and touch new file
481     fullString = parString[:-2]
482 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
483     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
484 gutsche 1.38 self.jobDestination.append(blockSites[block])
485     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
486 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
487 gutsche 1.38 # reset counter
488     jobCount = jobCount + 1
489 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
490     eventsRemaining = eventsRemaining - eventsPerJobRequested
491 gutsche 1.38 jobSkipEventCount = 0
492     # reset file
493 ewv 1.131 parString = ""
494 gutsche 1.38 filesEventCount = 0
495     newFile = 1
496     fileCount += 1
497 ewv 1.131
498 gutsche 1.68 # if more events in file remain than eventsPerJobRequested
499 gutsche 1.38 else :
500 gutsche 1.68 # close job but don't touch new file
501     fullString = parString[:-2]
502     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
503     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
504     self.jobDestination.append(blockSites[block])
505     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
506 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
507 gutsche 1.68 # increase counter
508     jobCount = jobCount + 1
509     totalEventCount = totalEventCount + eventsPerJobRequested
510     eventsRemaining = eventsRemaining - eventsPerJobRequested
511     # calculate skip events for last file
512     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
513     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
514     # remove all but the last file
515     filesEventCount = self.eventsbyfile[file]
516 ewv 1.160 parString = '\\\"' + file + '\\\"\,'
517 gutsche 1.68 pass # END if
518     pass # END while (iterate over files in the block)
519 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
520 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
521 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
522 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
523 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
524 ewv 1.131
525 gutsche 1.92 # screen output
526     screenOutput = "List of jobs and available destination sites:\n\n"
527    
528 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
529     noSiteBlock = []
530     bloskNoSite = []
531    
532 gutsche 1.92 blockCounter = 0
533 gutsche 1.104 for block in blocks:
534     if block in jobsOfBlock.keys() :
535     blockCounter += 1
536 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
537 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
538 ewv 1.131 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
539 mcinquil 1.124 bloskNoSite.append( blockCounter )
540 ewv 1.131
541 mcinquil 1.124 common.logger.message(screenOutput)
542 fanzago 1.127 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
543 mcinquil 1.126 msg = 'WARNING: No sites are hosting any part of data for block:\n '
544     virgola = ""
545     if len(bloskNoSite) > 1:
546     virgola = ","
547     for block in bloskNoSite:
548     msg += ' ' + str(block) + virgola
549     msg += '\n Related jobs:\n '
550     virgola = ""
551     if len(noSiteBlock) > 1:
552     virgola = ","
553     for range_jobs in noSiteBlock:
554     msg += str(range_jobs) + virgola
555     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
556 slacapra 1.155 if self.cfg_params.has_key('EDG.se_white_list'):
557     msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
558     msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
559     msg += 'Please check if the dataset is available at this site!)\n'
560     if self.cfg_params.has_key('EDG.ce_white_list'):
561     msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
562     msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
563     msg += 'Please check if the dataset is available at this site!)\n'
564    
565 mcinquil 1.126 common.logger.message(msg)
566 gutsche 1.92
567 slacapra 1.9 self.list_of_args = list_of_lists
568     return
569    
570 slacapra 1.21 def jobSplittingNoInput(self):
571 slacapra 1.9 """
572     Perform job splitting based on number of event per job
573     """
574     common.logger.debug(5,'Splitting per events')
575 fanzago 1.130
576 ewv 1.131 if (self.selectEventsPerJob):
577 fanzago 1.130 common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
578     if (self.selectNumberOfJobs):
579     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
580     if (self.selectTotalNumberEvents):
581     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
582 slacapra 1.9
583 slacapra 1.10 if (self.total_number_of_events < 0):
584     msg='Cannot split jobs per Events with "-1" as total number of events'
585     raise CrabException(msg)
586    
587 slacapra 1.22 if (self.selectEventsPerJob):
588 spiga 1.65 if (self.selectTotalNumberEvents):
589     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
590 ewv 1.131 elif(self.selectNumberOfJobs) :
591 spiga 1.65 self.total_number_of_jobs =self.theNumberOfJobs
592 ewv 1.131 self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
593 spiga 1.65
594 slacapra 1.22 elif (self.selectNumberOfJobs) :
595     self.total_number_of_jobs = self.theNumberOfJobs
596     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
597 ewv 1.131
598 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
599    
600     # is there any remainder?
601     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
602    
603     common.logger.debug(5,'Check '+str(check))
604    
605 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
606 slacapra 1.9 if check > 0:
607 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
608 slacapra 1.9
609 slacapra 1.10 # argument is seed number.$i
610 slacapra 1.9 self.list_of_args = []
611     for i in range(self.total_number_of_jobs):
612 gutsche 1.35 ## Since there is no input, any site is good
613 ewv 1.131 self.jobDestination.append([""]) #must be empty to write correctly the xml
614 slacapra 1.90 args=[]
615 spiga 1.57 if (self.firstRun):
616 slacapra 1.138 ## pythia first run
617 slacapra 1.90 args.append(str(self.firstRun)+str(i))
618     self.list_of_args.append(args)
619 ewv 1.131
620 gutsche 1.3 return
621    
622 spiga 1.42
623     def jobSplittingForScript(self):#CarlosDaniele
624     """
625     Perform job splitting based on number of job
626     """
627     common.logger.debug(5,'Splitting per job')
628     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
629    
630     self.total_number_of_jobs = self.theNumberOfJobs
631    
632     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
633    
634     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
635    
636     # argument is seed number.$i
637     self.list_of_args = []
638     for i in range(self.total_number_of_jobs):
639     ## Since there is no input, any site is good
640     # self.jobDestination.append(["Any"])
641     self.jobDestination.append([""])
642     ## no random seed
643     self.list_of_args.append([str(i)])
644     return
645    
646 gutsche 1.3 def split(self, jobParams):
647 ewv 1.131
648 gutsche 1.3 #### Fabio
649     njobs = self.total_number_of_jobs
650 slacapra 1.9 arglist = self.list_of_args
651 gutsche 1.3 # create the empty structure
652     for i in range(njobs):
653     jobParams.append("")
654 ewv 1.131
655 spiga 1.165 listID=[]
656     listField=[]
657 gutsche 1.3 for job in range(njobs):
658 slacapra 1.17 jobParams[job] = arglist[job]
659 spiga 1.165 listID.append(job)
660 spiga 1.162 job_ToSave ={}
661 spiga 1.165 concString = ','
662     argu=''
663     if len(jobParams[job]):
664     argu += concString.join(jobParams[job] )
665     job_ToSave['arguments']= str(job+1)+','+argu## new BL--DS
666 spiga 1.162 job_ToSave['dlsDestination']= self.jobDestination[job]## new BL--DS
667 spiga 1.165 #common._db.updateJob_(job,job_ToSave)## new BL--DS
668     listField.append(job_ToSave)
669     msg="Job "+str(job)+" Arguments: "+str(job+1)+","+argu+"\n" \
670     +" Destination: "+str(self.jobDestination[job])
671     common.logger.debug(5,msg)
672     #common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
673     common._db.updateJob_(listID,listField)## new BL--DS
674     ## Pay Attention Here....DS--BL
675     self.argsList = (len(jobParams[1])+1)
676 gutsche 1.3
677     return
678 spiga 1.165 #
679     # def getJobTypeArguments(self, nj, sched):
680     # result = ''
681     # jobs=[]
682     # jobs.append(nj)
683     # for i in common._db.queryJob('arguments',jobs):## BL--DS
684     # result=result+str(i)+" "
685     # return result
686 ewv 1.131
687 gutsche 1.3 def numberOfJobs(self):
688     # Fabio
689     return self.total_number_of_jobs
690    
691 slacapra 1.1 def getTarBall(self, exe):
692     """
693     Return the TarBall with lib and exe
694     """
695 ewv 1.131
696 slacapra 1.1 # if it exist, just return it
697 corvo 1.56 #
698     # Marco. Let's start to use relative path for Boss XML files
699     #
700     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
701 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
702     return self.tgzNameWithPath
703    
704     # Prepare a tar gzipped file with user binaries.
705     self.buildTar_(exe)
706    
707     return string.strip(self.tgzNameWithPath)
708    
709     def buildTar_(self, executable):
710    
711     # First of all declare the user Scram area
712     swArea = self.scram.getSWArea_()
713     #print "swArea = ", swArea
714 slacapra 1.63 # swVersion = self.scram.getSWVersion()
715     # print "swVersion = ", swVersion
716 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
717     #print "swReleaseTop = ", swReleaseTop
718 ewv 1.131
719 slacapra 1.1 ## check if working area is release top
720     if swReleaseTop == '' or swArea == swReleaseTop:
721     return
722    
723 slacapra 1.61 import tarfile
724     try: # create tar ball
725     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
726     ## First find the executable
727 slacapra 1.86 if (self.executable != ''):
728 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
729     if ( not exeWithPath ):
730     raise CrabException('User executable '+executable+' not found')
731 ewv 1.131
732 slacapra 1.61 ## then check if it's private or not
733     if exeWithPath.find(swReleaseTop) == -1:
734     # the exe is private, so we must ship
735     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
736     path = swArea+'/'
737 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
738     if exeWithPath.find(path) >= 0 :
739     exe = string.replace(exeWithPath, path,'')
740 slacapra 1.129 tar.add(path+exe,exe)
741 corvo 1.85 else :
742     tar.add(exeWithPath,os.path.basename(executable))
743 slacapra 1.61 pass
744     else:
745     # the exe is from release, we'll find it on WN
746     pass
747 ewv 1.131
748 slacapra 1.61 ## Now get the libraries: only those in local working area
749     libDir = 'lib'
750     lib = swArea+'/' +libDir
751     common.logger.debug(5,"lib "+lib+" to be tarred")
752     if os.path.exists(lib):
753     tar.add(lib,libDir)
754 ewv 1.131
755 slacapra 1.61 ## Now check if module dir is present
756     moduleDir = 'module'
757     module = swArea + '/' + moduleDir
758     if os.path.isdir(module):
759     tar.add(module,moduleDir)
760    
761     ## Now check if any data dir(s) is present
762     swAreaLen=len(swArea)
763     for root, dirs, files in os.walk(swArea):
764     if "data" in dirs:
765     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
766     tar.add(root+"/data",root[swAreaLen:]+"/data")
767    
768 fanzago 1.93
769 fanzago 1.152 ## Add ProdCommon dir to tar
770 fanzago 1.93 prodcommonDir = 'ProdCommon'
771     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
772     if os.path.isdir(prodcommonPath):
773     tar.add(prodcommonPath,prodcommonDir)
774 ewv 1.131
775 slacapra 1.61 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
776     tar.close()
777     except :
778     raise CrabException('Could not create tar-ball')
779 gutsche 1.72
780     ## check for tarball size
781     tarballinfo = os.stat(self.tgzNameWithPath)
782     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
783     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
784    
785 slacapra 1.61 ## create tar-ball with ML stuff
786 ewv 1.131 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
787 slacapra 1.61 try:
788     tar = tarfile.open(self.MLtgzfile, "w:gz")
789     path=os.environ['CRABDIR'] + '/python/'
790 fanzago 1.166 #for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py','writeCfg.py']:
791     ### FEDE ####
792     for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py','writeCfg.py', 'JobReportErrorCode.py']:
793     ###############
794 slacapra 1.61 tar.add(path+file,file)
795     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
796     tar.close()
797     except :
798 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
799 ewv 1.131
800 slacapra 1.1 return
801 ewv 1.131
802 slacapra 1.97 def additionalInputFileTgz(self):
803     """
804     Put all additional files into a tar ball and return its name
805     """
806     import tarfile
807     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
808     tar = tarfile.open(tarName, "w:gz")
809     for file in self.additional_inbox_files:
810     tar.add(file,string.split(file,'/')[-1])
811     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
812     tar.close()
813     return tarName
814    
815 spiga 1.165 def wsSetupEnvironment(self, nj=0):
816 slacapra 1.1 """
817     Returns part of a job script which prepares
818     the execution environment for the job 'nj'.
819     """
820     # Prepare JobType-independent part
821 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
822 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
823 ewv 1.131 txt += 'if [ $middleware == LCG ]; then \n'
824 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
825     txt += 'elif [ $middleware == OSG ]; then\n'
826 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
827 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
828 fanzago 1.161 #txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
829     #txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
830     #txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
831     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
832     #txt += ' exit 1\n'
833     txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
834     txt += ' job_exit_code=10016\n'
835     txt += ' func_exit\n'
836 gutsche 1.3 txt += ' fi\n'
837 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
838 gutsche 1.3 txt += '\n'
839     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
840     txt += ' cd $WORKING_DIR\n'
841 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
842 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
843 fanzago 1.133 #txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
844     #txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
845 gutsche 1.3 txt += 'fi\n'
846 slacapra 1.1
847     # Prepare JobType-specific part
848     scram = self.scram.commandName()
849     txt += '\n\n'
850 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
851     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
852 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
853     txt += 'status=$?\n'
854     txt += 'if [ $status != 0 ] ; then\n'
855 fanzago 1.161 #txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
856     #txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
857     #txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
858     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
859     txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
860     txt += ' job_exit_code=10034\n'
861 fanzago 1.166 #txt += ' if [ $middleware == OSG ]; then \n'
862     #txt += ' cd $RUNTIME_AREA\n'
863     #txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
864     #txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
865     #txt += ' /bin/rm -rf $WORKING_DIR\n'
866     #txt += ' if [ -d $WORKING_DIR ] ;then\n'
867 fanzago 1.161 #txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
868     #txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
869     #txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
870     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
871 fanzago 1.166 #txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
872     #txt += ' job_exit_code=10017\n'
873     #txt += ' fi\n'
874     #txt += ' fi \n'
875 fanzago 1.161 #txt += ' exit 1 \n'
876 fanzago 1.163 txt += ' func_exit\n'
877 slacapra 1.1 txt += 'fi \n'
878     txt += 'cd '+self.version+'\n'
879 fanzago 1.99 ########## FEDE FOR DBS2 ######################
880     txt += 'SOFTWARE_DIR=`pwd`\n'
881 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
882 fanzago 1.99 ###############################################
883 slacapra 1.1 ### needed grep for bug in scramv1 ###
884     txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
885     # Handle the arguments:
886     txt += "\n"
887 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
888 slacapra 1.1 txt += "\n"
889 spiga 1.165 # txt += "if [ $nargs -lt "+str(len(self.argsList[nj].split()))+" ]\n"
890     txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
891 slacapra 1.1 txt += "then\n"
892 fanzago 1.161 #txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
893     #txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
894     #txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
895     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
896     txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
897     txt += ' job_exit_code=50113\n'
898 fanzago 1.166 #txt += ' if [ $middleware == OSG ]; then \n'
899     #txt += ' cd $RUNTIME_AREA\n'
900     #txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
901     #txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
902     #txt += ' /bin/rm -rf $WORKING_DIR\n'
903     #txt += ' if [ -d $WORKING_DIR ] ;then\n'
904 fanzago 1.161 #txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
905     #txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
906     #txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
907     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
908 fanzago 1.166 #txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
909     #txt += ' job_exit_code=10017\n'
910     #txt += ' fi\n'
911     #txt += ' fi\n'
912 fanzago 1.161 #txt += " exit 1\n"
913     txt += " func_exit\n"
914 slacapra 1.1 txt += "fi\n"
915     txt += "\n"
916    
917     # Prepare job-specific part
918     job = common.job_list[nj]
919 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
920 ewv 1.131 if (self.datasetPath):
921 fanzago 1.93 txt += '\n'
922     txt += 'DatasetPath='+self.datasetPath+'\n'
923    
924     datasetpath_split = self.datasetPath.split("/")
925 ewv 1.131
926 fanzago 1.93 txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
927     txt += 'DataTier='+datasetpath_split[2]+'\n'
928 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
929 fanzago 1.93
930     else:
931     txt += 'DatasetPath=MCDataTier\n'
932     txt += 'PrimaryDataset=null\n'
933     txt += 'DataTier=null\n'
934     txt += 'ApplicationFamily=MCDataTier\n'
935 fanzago 1.163 if self.pset != None:
936 spiga 1.42 pset = os.path.basename(job.configFilename())
937     txt += '\n'
938 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
939 spiga 1.42 if (self.datasetPath): # standard job
940 ewv 1.160 txt += 'InputFiles=${args[1]}; export InputFiles\n'
941     txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
942     txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
943 spiga 1.42 txt += 'echo "Inputfiles:<$InputFiles>"\n'
944     txt += 'echo "MaxEvents:<$MaxEvents>"\n'
945     txt += 'echo "SkipEvents:<$SkipEvents>"\n'
946     else: # pythia like job
947 ewv 1.160 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
948     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
949     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
950     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
951 slacapra 1.90 if (self.firstRun):
952 ewv 1.160 txt += 'FirstRun=${args[1]}; export FirstRun\n'
953 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
954 slacapra 1.90
955     txt += 'mv -f '+pset+' pset.cfg\n'
956 slacapra 1.1
957     if len(self.additional_inbox_files) > 0:
958 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
959     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
960     txt += 'fi\n'
961 ewv 1.131 pass
962 slacapra 1.1
963 fanzago 1.163 if self.pset != None:
964 spiga 1.42 txt += '\n'
965     txt += 'echo "***** cat pset.cfg *********"\n'
966     txt += 'cat pset.cfg\n'
967     txt += 'echo "****** end pset.cfg ********"\n'
968     txt += '\n'
969 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
970     txt += 'echo "PSETHASH = $PSETHASH" \n'
971 fanzago 1.93 txt += '\n'
972 gutsche 1.3 return txt
973 fanzago 1.166 #### FEDE #####
974     def wsUntarSoftware(self, nj=0):
975 gutsche 1.3 """
976     Put in the script the commands to build an executable
977     or a library.
978     """
979    
980 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
981 gutsche 1.3
982     if os.path.isfile(self.tgzNameWithPath):
983 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
984 gutsche 1.3 txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
985     txt += 'untar_status=$? \n'
986     txt += 'if [ $untar_status -ne 0 ]; then \n'
987 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
988     txt += ' job_exit_code=$untar_status\n'
989     txt += ' func_exit\n'
990 gutsche 1.3 txt += 'else \n'
991     txt += ' echo "Successful untar" \n'
992     txt += 'fi \n'
993 gutsche 1.50 txt += '\n'
994 fanzago 1.152 txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
995 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
996 fanzago 1.166 txt += ' export PYTHONPATH=$RUNTIME_AREA/ProdCommon\n'
997 gutsche 1.50 txt += 'else\n'
998 fanzago 1.166 txt += ' export PYTHONPATH=$RUNTIME_AREA/ProdCommon:${PYTHONPATH}\n'
999 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1000 gutsche 1.50 txt += 'fi\n'
1001     txt += '\n'
1002    
1003 gutsche 1.3 pass
1004 ewv 1.131
1005 slacapra 1.1 return txt
1006 fanzago 1.166
1007     def wsBuildExe(self, nj=0):
1008     """
1009     Put in the script the commands to build an executable
1010     or a library.
1011     """
1012    
1013     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
1014     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
1015    
1016     txt += 'mv $RUNTIME_AREA/lib . \n'
1017     txt += 'mv $RUNTIME_AREA/module . \n'
1018     txt += 'mv $RUNTIME_AREA/ProdCommon . \n'
1019    
1020    
1021     #if os.path.isfile(self.tgzNameWithPath):
1022     # txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
1023     # txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1024     # txt += 'untar_status=$? \n'
1025     # txt += 'if [ $untar_status -ne 0 ]; then \n'
1026     # txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
1027     # txt += ' job_exit_code=$untar_status\n'
1028     # txt += ' func_exit\n'
1029     # txt += 'else \n'
1030     # txt += ' echo "Successful untar" \n'
1031     # txt += 'fi \n'
1032     # txt += '\n'
1033     # txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
1034     txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1035     txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
1036     txt += 'else\n'
1037     txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1038     txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1039     txt += 'fi\n'
1040     txt += '\n'
1041    
1042     return txt
1043     ############################################################################
1044 slacapra 1.1
1045     def modifySteeringCards(self, nj):
1046     """
1047 ewv 1.131 modify the card provided by the user,
1048 slacapra 1.1 writing a new card into share dir
1049     """
1050 ewv 1.131
1051 slacapra 1.1 def executableName(self):
1052 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1053 spiga 1.42 return "sh "
1054     else:
1055     return self.executable
1056 slacapra 1.1
1057     def executableArgs(self):
1058 ewv 1.160 # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
1059 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1060 spiga 1.42 return self.scriptExe + " $NJob"
1061 fanzago 1.115 else:
1062     version_array = self.scram.getSWVersion().split('_')
1063     major = 0
1064     minor = 0
1065     try:
1066     major = int(version_array[1])
1067     minor = int(version_array[2])
1068     except:
1069 ewv 1.131 msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1070 fanzago 1.115 raise CrabException(msg)
1071 ewv 1.160
1072     ex_args = ""
1073    
1074     # Framework job report
1075 fanzago 1.115 if major >= 1 and minor >= 5 :
1076 fanzago 1.166 #ex_args += " -j " + self.fjrFileName
1077     ### FEDE it could be improved!!! ####
1078     ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
1079     #######################################
1080 ewv 1.160 # Type of cfg file
1081     if major >= 2 :
1082     ex_args += " -p pset.pycfg"
1083 fanzago 1.115 else:
1084 ewv 1.160 ex_args += " -p pset.cfg"
1085     return ex_args
1086 slacapra 1.1
1087     def inputSandbox(self, nj):
1088     """
1089     Returns a list of filenames to be put in JDL input sandbox.
1090     """
1091     inp_box = []
1092 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1093     # seen = {}
1094 slacapra 1.1 ## code
1095     if os.path.isfile(self.tgzNameWithPath):
1096     inp_box.append(self.tgzNameWithPath)
1097 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1098     inp_box.append(self.MLtgzfile)
1099 slacapra 1.1 ## config
1100 slacapra 1.70 if not self.pset is None:
1101 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1102 slacapra 1.1 ## additional input files
1103 slacapra 1.97 tgz = self.additionalInputFileTgz()
1104     inp_box.append(tgz)
1105 slacapra 1.1 return inp_box
1106    
1107     def outputSandbox(self, nj):
1108     """
1109     Returns a list of filenames to be put in JDL output sandbox.
1110     """
1111     out_box = []
1112    
1113     ## User Declared output files
1114 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1115 ewv 1.131 n_out = nj + 1
1116 slacapra 1.1 out_box.append(self.numberFile_(out,str(n_out)))
1117     return out_box
1118    
1119     def prepareSteeringCards(self):
1120     """
1121     Make initial modifications of the user's steering card file.
1122     """
1123     return
1124    
1125     def wsRenameOutput(self, nj):
1126     """
1127     Returns part of a job script which renames the produced files.
1128     """
1129    
1130 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1131 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1132     txt += 'echo ">>> current directory content:"\n'
1133 gutsche 1.7 txt += 'ls \n'
1134 fanzago 1.145 txt += '\n'
1135 slacapra 1.54
1136 fanzago 1.161 #txt += 'output_exit_status=0\n'
1137 ewv 1.131
1138 fanzago 1.166 ### FEDE #######
1139     #for fileWithSuffix in (self.output_file_sandbox):
1140     # output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1141     # txt += '\n'
1142     # txt += '# check output file\n'
1143     # txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1144     # txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1145     # txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1146     # txt += 'else\n'
1147     # txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1148     # txt += ' job_exit_code=60302\n'
1149     # if common.scheduler.name().upper() == 'CONDOR_G':
1150     # txt += ' if [ $middleware == OSG ]; then \n'
1151     # txt += ' echo "prepare dummy output file"\n'
1152     # txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1153     # txt += ' fi \n'
1154     # txt += 'fi\n'
1155 ewv 1.131
1156 fanzago 1.128 for fileWithSuffix in (self.output_file):
1157 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1158     txt += '\n'
1159 gutsche 1.7 txt += '# check output file\n'
1160 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1161 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1162     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
1163     txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1164     else:
1165     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1166     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1167 slacapra 1.106 txt += 'else\n'
1168 fanzago 1.161 #txt += ' exit_status=60302\n'
1169     #txt += ' echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1170     #txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1171     #txt += ' output_exit_status=$exit_status\n'
1172     txt += ' job_exit_code=60302\n'
1173     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1174 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
1175 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
1176     txt += ' echo "prepare dummy output file"\n'
1177     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1178     txt += ' fi \n'
1179 slacapra 1.1 txt += 'fi\n'
1180 slacapra 1.105 file_list = []
1181     for fileWithSuffix in (self.output_file):
1182     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1183 ewv 1.131
1184 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1185 fanzago 1.149 txt += '\n'
1186 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1187     txt += 'echo ">>> current directory content:"\n'
1188     txt += 'ls \n'
1189     txt += '\n'
1190 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1191 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1192 slacapra 1.1 return txt
1193    
1194     def numberFile_(self, file, txt):
1195     """
1196     append _'txt' before last extension of a file
1197     """
1198     p = string.split(file,".")
1199     # take away last extension
1200     name = p[0]
1201     for x in p[1:-1]:
1202 slacapra 1.90 name=name+"."+x
1203 slacapra 1.1 # add "_txt"
1204     if len(p)>1:
1205 slacapra 1.90 ext = p[len(p)-1]
1206     result = name + '_' + txt + "." + ext
1207 slacapra 1.1 else:
1208 slacapra 1.90 result = name + '_' + txt
1209 ewv 1.131
1210 slacapra 1.1 return result
1211    
1212 slacapra 1.63 def getRequirements(self, nj=[]):
1213 slacapra 1.1 """
1214 ewv 1.131 return job requirements to add to jdl files
1215 slacapra 1.1 """
1216     req = ''
1217 slacapra 1.47 if self.version:
1218 slacapra 1.10 req='Member("VO-cms-' + \
1219 slacapra 1.47 self.version + \
1220 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1221 farinafa 1.111 ## SL add requirement for OS version only if SL4
1222     #reSL4 = re.compile( r'slc4' )
1223 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1224 gutsche 1.107 req+=' && Member("VO-cms-' + \
1225 slacapra 1.105 self.executable_arch + \
1226     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1227 gutsche 1.35
1228     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1229 afanfani 1.158 if common.scheduler.name() == "glitecoll":
1230     req += ' && other.GlueCEStateStatus == "Production" '
1231 gutsche 1.35
1232 slacapra 1.1 return req
1233 gutsche 1.3
1234     def configFilename(self):
1235     """ return the config filename """
1236     return self.name()+'.cfg'
1237    
1238     def wsSetupCMSOSGEnvironment_(self):
1239     """
1240     Returns part of a job script which is prepares
1241     the execution environment and which is common for all CMS jobs.
1242     """
1243 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1244     txt += ' echo ">>> setup CMS OSG environment:"\n'
1245 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1246     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1247 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1248 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1249 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1250 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1251     txt += ' else\n'
1252 fanzago 1.161 #txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1253     #txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1254     #txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1255     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1256     txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1257     txt += ' job_exit_code=10020\n'
1258 fanzago 1.166 #txt += ' cd $RUNTIME_AREA\n'
1259     #txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1260     #txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1261     #txt += ' /bin/rm -rf $WORKING_DIR\n'
1262     #txt += ' if [ -d $WORKING_DIR ] ;then\n'
1263 fanzago 1.161 #txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1264     #txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1265     #txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1266     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1267 fanzago 1.166 #txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1268     #txt += ' job_exit_code=10017\n'
1269     #txt += ' fi\n'
1270 gutsche 1.3 txt += '\n'
1271 fanzago 1.161 #txt += ' exit 1\n'
1272     txt += ' func_exit\n'
1273 fanzago 1.133 txt += ' fi\n'
1274 gutsche 1.3 txt += '\n'
1275 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1276 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1277 gutsche 1.3
1278     return txt
1279 ewv 1.131
1280 gutsche 1.3 ### OLI_DANIELE
1281     def wsSetupCMSLCGEnvironment_(self):
1282     """
1283     Returns part of a job script which is prepares
1284     the execution environment and which is common for all CMS jobs.
1285     """
1286 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1287     txt += ' echo ">>> setup CMS LCG environment:"\n'
1288 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1289     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1290     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1291     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1292 fanzago 1.161 #txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1293     #txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1294     #txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1295     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1296     #txt += ' exit 1\n'
1297     txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1298     txt += ' job_exit_code=10031\n'
1299     txt += ' func_exit\n'
1300 fanzago 1.133 txt += ' else\n'
1301     txt += ' echo "Sourcing environment... "\n'
1302     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1303 fanzago 1.161 #txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1304     #txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1305     #txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1306     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1307     #txt += ' exit 1\n'
1308     txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1309     txt += ' job_exit_code=10020\n'
1310     txt += ' func_exit\n'
1311 fanzago 1.133 txt += ' fi\n'
1312     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1313     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1314     txt += ' result=$?\n'
1315     txt += ' if [ $result -ne 0 ]; then\n'
1316 fanzago 1.161 #txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1317     #txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1318     #txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1319     #txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1320     #txt += ' exit 1\n'
1321     txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1322     txt += ' job_exit_code=10032\n'
1323     txt += ' func_exit\n'
1324 fanzago 1.133 txt += ' fi\n'
1325     txt += ' fi\n'
1326     txt += ' \n'
1327 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1328 gutsche 1.3 return txt
1329 gutsche 1.5
1330 ewv 1.131 ### FEDE FOR DBS OUTPUT PUBLICATION
1331 fanzago 1.93 def modifyReport(self, nj):
1332     """
1333 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1334 fanzago 1.93 """
1335 fanzago 1.94
1336 ewv 1.160 txt = '\n#Written by cms_cmssw::modifyReport\n'
1337 fanzago 1.94 try:
1338 ewv 1.131 publish_data = int(self.cfg_params['USER.publish_data'])
1339 fanzago 1.94 except KeyError:
1340     publish_data = 0
1341 ewv 1.131 if (publish_data == 1):
1342 fanzago 1.161
1343     txt += 'if [ $copy_exit_status -eq 0 ]; then\n'
1344     txt += ' echo ">>> Modify Job Report:" \n'
1345     txt += ' chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1346     #txt += ' if [ -z "$SE" ]; then\n'
1347     #txt += ' SE="" \n'
1348     #txt += ' fi \n'
1349     #txt += ' if [ -z "$SE_PATH" ]; then\n'
1350     #txt += ' SE_PATH="" \n'
1351     #txt += ' fi \n'
1352     txt += ' echo "SE = $SE"\n'
1353     txt += ' echo "SE_PATH = $SE_PATH"\n'
1354 fanzago 1.94
1355     processedDataset = self.cfg_params['USER.publish_data_name']
1356 fanzago 1.161 txt += ' ProcessedDataset='+processedDataset+'\n'
1357     #txt += ' if [ "$SE_PATH" == "" ]; then\n'
1358     #txt += ' FOR_LFN=/copy_problems/ \n'
1359     #txt += ' else \n'
1360     #txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1361     #txt += ' FOR_LFN=/store$tmp \n'
1362     #txt += ' fi \n'
1363 fanzago 1.94 txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1364     txt += ' FOR_LFN=/store$tmp \n'
1365 fanzago 1.161 txt += ' echo "ProcessedDataset = $ProcessedDataset"\n'
1366     txt += ' echo "FOR_LFN = $FOR_LFN" \n'
1367     txt += ' echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1368 fanzago 1.166 #txt += ' echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1369     #txt += ' $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1370     ### FEDE ####
1371     txt += ' echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1372     txt += ' $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1373     ####################################
1374 fanzago 1.161 txt += ' modifyReport_result=$?\n'
1375     txt += ' if [ $modifyReport_result -ne 0 ]; then\n'
1376     txt += ' modifyReport_result=70500\n'
1377     txt += ' job_exit_code=$modifyReport_result\n'
1378     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1379     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1380     txt += ' else\n'
1381 fanzago 1.166 ### FEDE #####
1382     #txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1383     #######################
1384     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1385 fanzago 1.161 txt += ' fi\n'
1386 spiga 1.103 txt += 'fi\n'
1387 fanzago 1.93 return txt
1388 fanzago 1.99
1389     def cleanEnv(self):
1390 ewv 1.160 txt = '\n#Written by cms_cmssw::cleanEnv\n'
1391 ewv 1.131 txt += 'if [ $middleware == OSG ]; then\n'
1392 fanzago 1.99 txt += ' cd $RUNTIME_AREA\n'
1393 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1394     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1395 fanzago 1.99 txt += ' /bin/rm -rf $WORKING_DIR\n'
1396     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1397 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1398 fanzago 1.164 txt += ' job_exit_code=10017\n'
1399 fanzago 1.161 txt += ' func_exit\n'
1400 fanzago 1.99 txt += ' fi\n'
1401     txt += 'fi\n'
1402     txt += '\n'
1403     return txt
1404 fanzago 1.93
1405 gutsche 1.5 def setParam_(self, param, value):
1406     self._params[param] = value
1407    
1408     def getParams(self):
1409     return self._params
1410 gutsche 1.8
1411 gutsche 1.35 def uniquelist(self, old):
1412     """
1413     remove duplicates from a list
1414     """
1415     nd={}
1416     for e in old:
1417     nd[e]=0
1418     return nd.keys()
1419 mcinquil 1.121
1420    
1421     def checkOut(self, limit):
1422     """
1423     check the dimension of the output files
1424     """
1425 ewv 1.160 txt = '\n#Written by cms_cmssw::checkOut\n'
1426     txt += 'echo ">>> Starting output sandbox limit check :"\n'
1427 mcinquil 1.121 listOutFiles = []
1428 slacapra 1.151 txt += 'stdoutFile=`ls *stdout` \n'
1429     txt += 'stderrFile=`ls *stderr` \n'
1430 fanzago 1.148 if (self.return_data == 1):
1431 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1432     listOutFiles.append(self.numberFile_(file, '$NJob'))
1433     listOutFiles.append('$stdoutFile')
1434     listOutFiles.append('$stderrFile')
1435 ewv 1.156 else:
1436 spiga 1.157 for file in (self.output_file_sandbox):
1437     listOutFiles.append(self.numberFile_(file, '$NJob'))
1438     listOutFiles.append('$stdoutFile')
1439     listOutFiles.append('$stderrFile')
1440 ewv 1.159
1441 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1442 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1443 mcinquil 1.121 txt += 'ls -gGhrta;\n'
1444     txt += 'sum=0;\n'
1445 spiga 1.157 txt += 'for file in $filesToCheck ; do\n'
1446 mcinquil 1.121 txt += ' if [ -e $file ]; then\n'
1447     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1448     txt += ' sum=`expr $sum + $tt`\n'
1449     txt += ' else\n'
1450     txt += ' echo "WARNING: output file $file not found!"\n'
1451     txt += ' fi\n'
1452     txt += 'done\n'
1453     txt += 'echo "Total Output dimension: $sum";\n'
1454     txt += 'limit='+str(limit)+';\n'
1455 fanzago 1.161 txt += 'echo "WARNING: output files size limit is set to: $limit";\n'
1456 mcinquil 1.121 txt += 'if [ $limit -lt $sum ]; then\n'
1457     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1458     txt += ' echo " checking the output file sizes..."\n'
1459     txt += ' tot=0;\n'
1460 spiga 1.157 txt += ' for filefile in $filesToCheck ; do\n'
1461 mcinquil 1.143 txt += ' dimFile=`ls -gGrta $filefile | awk \'{ print $3 }\';`\n'
1462 mcinquil 1.121 txt += ' tot=`expr $tot + $tt`;\n'
1463 mcinquil 1.143 txt += ' if [ $limit -lt $dimFile ]; then\n'
1464     txt += ' echo "deleting file: $filefile";\n'
1465     txt += ' rm -f $filefile\n'
1466     txt += ' elif [ $limit -lt $tot ]; then\n'
1467     txt += ' echo "deleting file: $filefile";\n'
1468     txt += ' rm -f $filefile\n'
1469     txt += ' else\n'
1470     txt += ' echo "saving file: $filefile"\n'
1471 mcinquil 1.121 txt += ' fi\n'
1472     txt += ' done\n'
1473 mcinquil 1.143
1474 fanzago 1.161 txt += ' ls -agGhrt\n'
1475     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox."\n'
1476     #txt += ' echo "JOB_EXIT_STATUS = 70000"\n'
1477     #txt += ' exit_status=70000\n'
1478     txt += ' job_exit_code=70000\n'
1479     txt += 'else\n'
1480     txt += ' echo "Total Output dimension $sum is fine."\n'
1481 mcinquil 1.121 txt += 'fi\n'
1482 fanzago 1.133 txt += 'echo "Ending output sandbox limit check"\n'
1483 mcinquil 1.121 return txt