ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.160.2.1
Committed: Thu Mar 20 10:43:08 2008 UTC (17 years, 1 month ago) by fanzago
Content type: text/x-python
Branch: CRAB_2_1_1_pre2
Changes since 1.160: +5 -2 lines
Log Message:
patch for publication (store directory in PFN not compulsory)

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5 fanzago 1.115 from BlackWhiteListParser import BlackWhiteListParser
6 slacapra 1.1 import common
7     import Scram
8 fanzago 1.160.2.1 from LFNBaseName import *
9 slacapra 1.1
10 slacapra 1.105 import os, string, glob
11 slacapra 1.1
12     class Cmssw(JobType):
13 mcinquil 1.144 def __init__(self, cfg_params, ncjobs):
14 slacapra 1.1 JobType.__init__(self, 'CMSSW')
15     common.logger.debug(3,'CMSSW::__init__')
16    
17 mcinquil 1.140 self.argsList = []
18 mcinquil 1.144
19 gutsche 1.3 self._params = {}
20     self.cfg_params = cfg_params
21 fanzago 1.115 # init BlackWhiteListParser
22     self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
23    
24 slacapra 1.153 self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
25 gutsche 1.72
26 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
27 gutsche 1.38 self.ncjobs = ncjobs
28    
29 slacapra 1.1 log = common.logger
30 ewv 1.131
31 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
32     self.additional_inbox_files = []
33     self.scriptExe = ''
34     self.executable = ''
35 slacapra 1.71 self.executable_arch = self.scram.getArch()
36 slacapra 1.1 self.tgz_name = 'default.tgz'
37 slacapra 1.97 self.additional_tgz_name = 'additional.tgz'
38 corvo 1.56 self.scriptName = 'CMSSW.sh'
39 ewv 1.131 self.pset = '' #scrip use case Da
40 spiga 1.42 self.datasetPath = '' #scrip use case Da
41 gutsche 1.3
42 gutsche 1.50 # set FJR file name
43     self.fjrFileName = 'crab_fjr.xml'
44    
45 slacapra 1.1 self.version = self.scram.getSWVersion()
46 ewv 1.131
47 spiga 1.114 #
48     # Try to block creation in case of arch/version mismatch
49     #
50    
51     a = string.split(self.version, "_")
52    
53     if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
54 fanzago 1.134 msg = "Warning: You are using %s version of CMSSW with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
55     common.logger.message(msg)
56 spiga 1.114 if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
57     msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
58     raise CrabException(msg)
59 ewv 1.131
60 slacapra 1.55 common.taskDB.setDict('codeVersion',self.version)
61 gutsche 1.5 self.setParam_('application', self.version)
62 slacapra 1.47
63 slacapra 1.1 ### collect Data cards
64 gutsche 1.66
65 slacapra 1.153 if not cfg_params.has_key('CMSSW.datasetpath'):
66 ewv 1.131 msg = "Error: datasetpath not defined "
67 slacapra 1.1 raise CrabException(msg)
68 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
69     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
70     if string.lower(tmp)=='none':
71     self.datasetPath = None
72     self.selectNoInput = 1
73     else:
74     self.datasetPath = tmp
75     self.selectNoInput = 0
76 gutsche 1.5
77     # ML monitoring
78     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
79 slacapra 1.9 if not self.datasetPath:
80     self.setParam_('dataset', 'None')
81     self.setParam_('owner', 'None')
82     else:
83 slacapra 1.153 ## SL what is supposed to fail here?
84 gutsche 1.92 try:
85     datasetpath_split = self.datasetPath.split("/")
86     # standard style
87 mcinquil 1.120 self.setParam_('datasetFull', self.datasetPath)
88 slacapra 1.137 self.setParam_('dataset', datasetpath_split[1])
89     self.setParam_('owner', datasetpath_split[2])
90 gutsche 1.92 except:
91     self.setParam_('dataset', self.datasetPath)
92     self.setParam_('owner', self.datasetPath)
93 ewv 1.131
94 slacapra 1.151 self.setParam_('taskId', common.taskDB.dict('taskId'))
95 gutsche 1.5
96 slacapra 1.1 self.dataTiers = []
97    
98     ## now the application
99 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
100     self.setParam_('exe', self.executable)
101     log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
102 slacapra 1.1
103 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
104 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
105 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
106     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
107     if self.pset.lower() != 'none' :
108     if (not os.path.exists(self.pset)):
109     raise CrabException("User defined PSet file "+self.pset+" does not exist")
110     else:
111     self.pset = None
112 slacapra 1.1
113     # output files
114 slacapra 1.53 ## stuff which must be returned always via sandbox
115     self.output_file_sandbox = []
116    
117     # add fjr report by default via sandbox
118     self.output_file_sandbox.append(self.fjrFileName)
119    
120     # other output files to be returned via sandbox or copied to SE
121 slacapra 1.153 self.output_file = []
122     tmp = cfg_params.get('CMSSW.output_file',None)
123     if tmp :
124     tmpOutFiles = string.split(tmp,',')
125     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
126     for tmp in tmpOutFiles:
127     tmp=string.strip(tmp)
128     self.output_file.append(tmp)
129 slacapra 1.1 pass
130 slacapra 1.153 else:
131 gutsche 1.92 log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
132 slacapra 1.153 pass
133 slacapra 1.1
134     # script_exe file as additional file in inputSandbox
135 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
136     if self.scriptExe :
137     if not os.path.isfile(self.scriptExe):
138     msg ="ERROR. file "+self.scriptExe+" not found"
139     raise CrabException(msg)
140     self.additional_inbox_files.append(string.strip(self.scriptExe))
141 slacapra 1.70
142 spiga 1.42 #CarlosDaniele
143     if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
144 slacapra 1.70 msg ="Error. script_exe not defined"
145 spiga 1.42 raise CrabException(msg)
146    
147 slacapra 1.1 ## additional input files
148 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
149 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
150 slacapra 1.70 for tmp in tmpAddFiles:
151     tmp = string.strip(tmp)
152     dirname = ''
153     if not tmp[0]=="/": dirname = "."
154 corvo 1.85 files = []
155     if string.find(tmp,"*")>-1:
156     files = glob.glob(os.path.join(dirname, tmp))
157     if len(files)==0:
158     raise CrabException("No additional input file found with this pattern: "+tmp)
159     else:
160     files.append(tmp)
161 slacapra 1.70 for file in files:
162     if not os.path.exists(file):
163     raise CrabException("Additional input file not found: "+file)
164 slacapra 1.45 pass
165 slacapra 1.105 # fname = string.split(file, '/')[-1]
166     # storedFile = common.work_space.pathForTgz()+'share/'+fname
167     # shutil.copyfile(file, storedFile)
168     self.additional_inbox_files.append(string.strip(file))
169 slacapra 1.1 pass
170     pass
171 slacapra 1.70 common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
172 slacapra 1.153 pass
173 gutsche 1.3
174 slacapra 1.9 ## Events per job
175 slacapra 1.153 if cfg_params.has_key('CMSSW.events_per_job'):
176 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
177 slacapra 1.9 self.selectEventsPerJob = 1
178 slacapra 1.153 else:
179 slacapra 1.9 self.eventsPerJob = -1
180     self.selectEventsPerJob = 0
181 ewv 1.131
182 slacapra 1.22 ## number of jobs
183 slacapra 1.153 if cfg_params.has_key('CMSSW.number_of_jobs'):
184 slacapra 1.22 self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
185     self.selectNumberOfJobs = 1
186 slacapra 1.153 else:
187 slacapra 1.22 self.theNumberOfJobs = 0
188     self.selectNumberOfJobs = 0
189 slacapra 1.10
190 slacapra 1.153 if cfg_params.has_key('CMSSW.total_number_of_events'):
191 gutsche 1.35 self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
192     self.selectTotalNumberEvents = 1
193 slacapra 1.153 else:
194 gutsche 1.35 self.total_number_of_events = 0
195     self.selectTotalNumberEvents = 0
196    
197 ewv 1.131 if self.pset != None: #CarlosDaniele
198 spiga 1.42 if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
199     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
200     raise CrabException(msg)
201     else:
202     if (self.selectNumberOfJobs == 0):
203     msg = 'Must specify number_of_jobs.'
204     raise CrabException(msg)
205 gutsche 1.35
206 ewv 1.160 ## New method of dealing with seeds
207     self.incrementSeeds = []
208     self.preserveSeeds = []
209     if cfg_params.has_key('CMSSW.preserve_seeds'):
210     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
211     for tmp in tmpList:
212     tmp.strip()
213     self.preserveSeeds.append(tmp)
214     if cfg_params.has_key('CMSSW.increment_seeds'):
215     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
216     for tmp in tmpList:
217     tmp.strip()
218     self.incrementSeeds.append(tmp)
219    
220     ## Old method of dealing with seeds
221     ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
222     ## remove
223 slacapra 1.153 self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
224 ewv 1.160 if self.sourceSeed:
225     print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
226     self.incrementSeeds.append('sourceSeed')
227 slacapra 1.153
228     self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
229 ewv 1.160 if self.sourceSeedVtx:
230     print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
231     self.incrementSeeds.append('VtxSmeared')
232 slacapra 1.22
233 slacapra 1.153 self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
234 ewv 1.160 if self.sourceSeedG4:
235     print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
236     self.incrementSeeds.append('g4SimHits')
237 slacapra 1.90
238 slacapra 1.153 self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
239 ewv 1.160 if self.sourceSeedMix:
240     print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
241     self.incrementSeeds.append('mix')
242 slacapra 1.90
243 slacapra 1.153 self.firstRun = cfg_params.get('CMSSW.first_run',None)
244 slacapra 1.90
245 spiga 1.42 if self.pset != None: #CarlosDaniele
246 ewv 1.131 import PsetManipulator as pp
247 slacapra 1.97 PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
248 gutsche 1.3
249 ewv 1.147 # Copy/return
250    
251 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
252     self.return_data = int(cfg_params.get('USER.return_data',0))
253 ewv 1.147
254 slacapra 1.1 #DBSDLS-start
255 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
256 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
257     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
258 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
259 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
260 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
261 gutsche 1.35 blockSites = {}
262 slacapra 1.9 if self.datasetPath:
263 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
264 ewv 1.131 #DBSDLS-end
265 slacapra 1.1
266     self.tgzNameWithPath = self.getTarBall(self.executable)
267 ewv 1.131
268 slacapra 1.9 ## Select Splitting
269 ewv 1.131 if self.selectNoInput:
270 spiga 1.42 if self.pset == None: #CarlosDaniele
271     self.jobSplittingForScript()
272     else:
273     self.jobSplittingNoInput()
274 gutsche 1.92 else:
275 corvo 1.56 self.jobSplittingByBlocks(blockSites)
276 gutsche 1.5
277 slacapra 1.22 # modify Pset
278 spiga 1.42 if self.pset != None: #CarlosDaniele
279 slacapra 1.86 try:
280 ewv 1.160 # Add FrameworkJobReport to parameter-set, set max events.
281     # Reset later for data jobs by writeCFG which does all modifications
282 slacapra 1.90 PsetEdit.addCrabFJR(self.fjrFileName)
283 ewv 1.160 PsetEdit.maxEvent(self.eventsPerJob)
284 slacapra 1.90 PsetEdit.psetWriter(self.configFilename())
285 slacapra 1.86 except:
286     msg='Error while manipuliating ParameterSet: exiting...'
287     raise CrabException(msg)
288 gutsche 1.3
289 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
290    
291 slacapra 1.86 import DataDiscovery
292     import DataLocation
293 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
294    
295     datasetPath=self.datasetPath
296    
297 slacapra 1.1 ## Contact the DBS
298 gutsche 1.92 common.logger.message("Contacting Data Discovery Services ...")
299 slacapra 1.1 try:
300 slacapra 1.137 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
301 slacapra 1.1 self.pubdata.fetchDBSInfo()
302    
303 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
304 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
305     raise CrabException(msg)
306 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
307 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
308     raise CrabException(msg)
309 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
310 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
311 slacapra 1.1 raise CrabException(msg)
312    
313 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
314 mkirn 1.37 self.eventsbyblock=self.pubdata.getEventsPerBlock()
315     self.eventsbyfile=self.pubdata.getEventsPerFile()
316 gutsche 1.3
317 slacapra 1.1 ## get max number of events
318 ewv 1.131 self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
319 slacapra 1.1
320     ## Contact the DLS and build a list of sites hosting the fileblocks
321     try:
322 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
323 gutsche 1.6 dataloc.fetchDLSInfo()
324 slacapra 1.41 except DataLocation.DataLocationError , ex:
325 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
326     raise CrabException(msg)
327 ewv 1.131
328 slacapra 1.1
329 gutsche 1.35 sites = dataloc.getSites()
330     allSites = []
331     listSites = sites.values()
332 slacapra 1.63 for listSite in listSites:
333     for oneSite in listSite:
334 gutsche 1.35 allSites.append(oneSite)
335     allSites = self.uniquelist(allSites)
336 gutsche 1.3
337 gutsche 1.92 # screen output
338     common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
339    
340 gutsche 1.35 return sites
341 ewv 1.131
342 mcinquil 1.140 def setArgsList(self, argsList):
343     self.argsList = argsList
344    
345 gutsche 1.35 def jobSplittingByBlocks(self, blockSites):
346 slacapra 1.9 """
347 gutsche 1.35 Perform job splitting. Jobs run over an integer number of files
348     and no more than one block.
349     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
350     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
351     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
352     self.maxEvents, self.filesbyblock
353     SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
354     self.total_number_of_jobs - Total # of jobs
355     self.list_of_args - File(s) job will run on (a list of lists)
356     """
357    
358     # ---- Handle the possible job splitting configurations ---- #
359     if (self.selectTotalNumberEvents):
360     totalEventsRequested = self.total_number_of_events
361     if (self.selectEventsPerJob):
362     eventsPerJobRequested = self.eventsPerJob
363     if (self.selectNumberOfJobs):
364     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
365    
366     # If user requested all the events in the dataset
367     if (totalEventsRequested == -1):
368     eventsRemaining=self.maxEvents
369     # If user requested more events than are in the dataset
370     elif (totalEventsRequested > self.maxEvents):
371     eventsRemaining = self.maxEvents
372     common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
373     # If user requested less events than are in the dataset
374     else:
375     eventsRemaining = totalEventsRequested
376 slacapra 1.22
377 slacapra 1.41 # If user requested more events per job than are in the dataset
378     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
379     eventsPerJobRequested = self.maxEvents
380    
381 gutsche 1.35 # For user info at end
382     totalEventCount = 0
383 gutsche 1.3
384 gutsche 1.35 if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
385     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
386 slacapra 1.22
387 gutsche 1.35 if (self.selectNumberOfJobs):
388     common.logger.message("May not create the exact number_of_jobs requested.")
389 slacapra 1.23
390 gutsche 1.38 if ( self.ncjobs == 'all' ) :
391     totalNumberOfJobs = 999999999
392     else :
393     totalNumberOfJobs = self.ncjobs
394 ewv 1.131
395 gutsche 1.38
396 gutsche 1.35 blocks = blockSites.keys()
397     blockCount = 0
398     # Backup variable in case self.maxEvents counted events in a non-included block
399     numBlocksInDataset = len(blocks)
400 gutsche 1.3
401 gutsche 1.35 jobCount = 0
402     list_of_lists = []
403 gutsche 1.3
404 gutsche 1.92 # list tracking which jobs are in which jobs belong to which block
405     jobsOfBlock = {}
406    
407 gutsche 1.35 # ---- Iterate over the blocks in the dataset until ---- #
408     # ---- we've met the requested total # of events ---- #
409 gutsche 1.38 while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
410 gutsche 1.35 block = blocks[blockCount]
411 gutsche 1.44 blockCount += 1
412 gutsche 1.104 if block not in jobsOfBlock.keys() :
413     jobsOfBlock[block] = []
414 ewv 1.131
415 gutsche 1.68 if self.eventsbyblock.has_key(block) :
416     numEventsInBlock = self.eventsbyblock[block]
417     common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
418 ewv 1.131
419 gutsche 1.68 files = self.filesbyblock[block]
420     numFilesInBlock = len(files)
421     if (numFilesInBlock <= 0):
422     continue
423     fileCount = 0
424    
425     # ---- New block => New job ---- #
426 ewv 1.131 parString = ""
427 gutsche 1.68 # counter for number of events in files currently worked on
428     filesEventCount = 0
429     # flag if next while loop should touch new file
430     newFile = 1
431     # job event counter
432     jobSkipEventCount = 0
433 ewv 1.131
434 gutsche 1.68 # ---- Iterate over the files in the block until we've met the requested ---- #
435     # ---- total # of events or we've gone over all the files in this block ---- #
436     while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
437     file = files[fileCount]
438     if newFile :
439     try:
440     numEventsInFile = self.eventsbyfile[file]
441     common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
442     # increase filesEventCount
443     filesEventCount += numEventsInFile
444     # Add file to current job
445     parString += '\\\"' + file + '\\\"\,'
446     newFile = 0
447     except KeyError:
448     common.logger.message("File "+str(file)+" has unknown number of events: skipping")
449 ewv 1.131
450 gutsche 1.38
451 gutsche 1.68 # if less events in file remain than eventsPerJobRequested
452     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
453     # if last file in block
454     if ( fileCount == numFilesInBlock-1 ) :
455     # end job using last file, use remaining events in block
456     # close job and touch new file
457     fullString = parString[:-2]
458     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
459     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
460     self.jobDestination.append(blockSites[block])
461     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
462 gutsche 1.92 # fill jobs of block dictionary
463 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
464 gutsche 1.68 # reset counter
465     jobCount = jobCount + 1
466     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
467     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
468     jobSkipEventCount = 0
469     # reset file
470 ewv 1.131 parString = ""
471 gutsche 1.68 filesEventCount = 0
472     newFile = 1
473     fileCount += 1
474     else :
475     # go to next file
476     newFile = 1
477     fileCount += 1
478     # if events in file equal to eventsPerJobRequested
479     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
480 gutsche 1.38 # close job and touch new file
481     fullString = parString[:-2]
482 gutsche 1.68 list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
483     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
484 gutsche 1.38 self.jobDestination.append(blockSites[block])
485     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
486 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
487 gutsche 1.38 # reset counter
488     jobCount = jobCount + 1
489 gutsche 1.68 totalEventCount = totalEventCount + eventsPerJobRequested
490     eventsRemaining = eventsRemaining - eventsPerJobRequested
491 gutsche 1.38 jobSkipEventCount = 0
492     # reset file
493 ewv 1.131 parString = ""
494 gutsche 1.38 filesEventCount = 0
495     newFile = 1
496     fileCount += 1
497 ewv 1.131
498 gutsche 1.68 # if more events in file remain than eventsPerJobRequested
499 gutsche 1.38 else :
500 gutsche 1.68 # close job but don't touch new file
501     fullString = parString[:-2]
502     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
503     common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
504     self.jobDestination.append(blockSites[block])
505     common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
506 gutsche 1.104 jobsOfBlock[block].append(jobCount+1)
507 gutsche 1.68 # increase counter
508     jobCount = jobCount + 1
509     totalEventCount = totalEventCount + eventsPerJobRequested
510     eventsRemaining = eventsRemaining - eventsPerJobRequested
511     # calculate skip events for last file
512     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
513     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
514     # remove all but the last file
515     filesEventCount = self.eventsbyfile[file]
516 ewv 1.160 parString = '\\\"' + file + '\\\"\,'
517 gutsche 1.68 pass # END if
518     pass # END while (iterate over files in the block)
519 gutsche 1.35 pass # END while (iterate over blocks in the dataset)
520 slacapra 1.41 self.ncjobs = self.total_number_of_jobs = jobCount
521 gutsche 1.38 if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
522 gutsche 1.35 common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
523 gutsche 1.92 common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
524 ewv 1.131
525 gutsche 1.92 # screen output
526     screenOutput = "List of jobs and available destination sites:\n\n"
527    
528 mcinquil 1.124 # keep trace of block with no sites to print a warning at the end
529     noSiteBlock = []
530     bloskNoSite = []
531    
532 gutsche 1.92 blockCounter = 0
533 gutsche 1.104 for block in blocks:
534     if block in jobsOfBlock.keys() :
535     blockCounter += 1
536 fanzago 1.115 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
537 mcinquil 1.124 if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
538 ewv 1.131 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
539 mcinquil 1.124 bloskNoSite.append( blockCounter )
540 ewv 1.131
541 mcinquil 1.124 common.logger.message(screenOutput)
542 fanzago 1.127 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
543 mcinquil 1.126 msg = 'WARNING: No sites are hosting any part of data for block:\n '
544     virgola = ""
545     if len(bloskNoSite) > 1:
546     virgola = ","
547     for block in bloskNoSite:
548     msg += ' ' + str(block) + virgola
549     msg += '\n Related jobs:\n '
550     virgola = ""
551     if len(noSiteBlock) > 1:
552     virgola = ","
553     for range_jobs in noSiteBlock:
554     msg += str(range_jobs) + virgola
555     msg += '\n will not be submitted and this block of data can not be analyzed!\n'
556 slacapra 1.155 if self.cfg_params.has_key('EDG.se_white_list'):
557     msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
558     msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
559     msg += 'Please check if the dataset is available at this site!)\n'
560     if self.cfg_params.has_key('EDG.ce_white_list'):
561     msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
562     msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
563     msg += 'Please check if the dataset is available at this site!)\n'
564    
565 mcinquil 1.126 common.logger.message(msg)
566 gutsche 1.92
567 slacapra 1.9 self.list_of_args = list_of_lists
568     return
569    
570 slacapra 1.21 def jobSplittingNoInput(self):
571 slacapra 1.9 """
572     Perform job splitting based on number of event per job
573     """
574     common.logger.debug(5,'Splitting per events')
575 fanzago 1.130
576 ewv 1.131 if (self.selectEventsPerJob):
577 fanzago 1.130 common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
578     if (self.selectNumberOfJobs):
579     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
580     if (self.selectTotalNumberEvents):
581     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
582 slacapra 1.9
583 slacapra 1.10 if (self.total_number_of_events < 0):
584     msg='Cannot split jobs per Events with "-1" as total number of events'
585     raise CrabException(msg)
586    
587 slacapra 1.22 if (self.selectEventsPerJob):
588 spiga 1.65 if (self.selectTotalNumberEvents):
589     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
590 ewv 1.131 elif(self.selectNumberOfJobs) :
591 spiga 1.65 self.total_number_of_jobs =self.theNumberOfJobs
592 ewv 1.131 self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
593 spiga 1.65
594 slacapra 1.22 elif (self.selectNumberOfJobs) :
595     self.total_number_of_jobs = self.theNumberOfJobs
596     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
597 ewv 1.131
598 slacapra 1.9 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
599    
600     # is there any remainder?
601     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
602    
603     common.logger.debug(5,'Check '+str(check))
604    
605 gutsche 1.35 common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
606 slacapra 1.9 if check > 0:
607 gutsche 1.35 common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
608 slacapra 1.9
609 slacapra 1.10 # argument is seed number.$i
610 slacapra 1.9 self.list_of_args = []
611     for i in range(self.total_number_of_jobs):
612 gutsche 1.35 ## Since there is no input, any site is good
613 ewv 1.131 self.jobDestination.append([""]) #must be empty to write correctly the xml
614 slacapra 1.90 args=[]
615 spiga 1.57 if (self.firstRun):
616 slacapra 1.138 ## pythia first run
617 slacapra 1.90 args.append(str(self.firstRun)+str(i))
618     self.list_of_args.append(args)
619 ewv 1.131
620 gutsche 1.3 return
621    
622 spiga 1.42
623     def jobSplittingForScript(self):#CarlosDaniele
624     """
625     Perform job splitting based on number of job
626     """
627     common.logger.debug(5,'Splitting per job')
628     common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
629    
630     self.total_number_of_jobs = self.theNumberOfJobs
631    
632     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
633    
634     common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
635    
636     # argument is seed number.$i
637     self.list_of_args = []
638     for i in range(self.total_number_of_jobs):
639     ## Since there is no input, any site is good
640     # self.jobDestination.append(["Any"])
641     self.jobDestination.append([""])
642     ## no random seed
643     self.list_of_args.append([str(i)])
644     return
645    
646 gutsche 1.3 def split(self, jobParams):
647 ewv 1.131
648 gutsche 1.3 common.jobDB.load()
649     #### Fabio
650     njobs = self.total_number_of_jobs
651 slacapra 1.9 arglist = self.list_of_args
652 gutsche 1.3 # create the empty structure
653     for i in range(njobs):
654     jobParams.append("")
655 ewv 1.131
656 gutsche 1.3 for job in range(njobs):
657 slacapra 1.17 jobParams[job] = arglist[job]
658     # print str(arglist[job])
659     # print jobParams[job]
660 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
661 gutsche 1.35 common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
662     common.jobDB.setDestination(job, self.jobDestination[job])
663 gutsche 1.3
664     common.jobDB.save()
665     return
666 ewv 1.131
667 gutsche 1.3 def getJobTypeArguments(self, nj, sched):
668 slacapra 1.17 result = ''
669     for i in common.jobDB.arguments(nj):
670     result=result+str(i)+" "
671     return result
672 ewv 1.131
673 gutsche 1.3 def numberOfJobs(self):
674     # Fabio
675     return self.total_number_of_jobs
676    
677 slacapra 1.1 def getTarBall(self, exe):
678     """
679     Return the TarBall with lib and exe
680     """
681 ewv 1.131
682 slacapra 1.1 # if it exist, just return it
683 corvo 1.56 #
684     # Marco. Let's start to use relative path for Boss XML files
685     #
686     self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
687 slacapra 1.1 if os.path.exists(self.tgzNameWithPath):
688     return self.tgzNameWithPath
689    
690     # Prepare a tar gzipped file with user binaries.
691     self.buildTar_(exe)
692    
693     return string.strip(self.tgzNameWithPath)
694    
695     def buildTar_(self, executable):
696    
697     # First of all declare the user Scram area
698     swArea = self.scram.getSWArea_()
699     #print "swArea = ", swArea
700 slacapra 1.63 # swVersion = self.scram.getSWVersion()
701     # print "swVersion = ", swVersion
702 slacapra 1.1 swReleaseTop = self.scram.getReleaseTop_()
703     #print "swReleaseTop = ", swReleaseTop
704 ewv 1.131
705 slacapra 1.1 ## check if working area is release top
706     if swReleaseTop == '' or swArea == swReleaseTop:
707     return
708    
709 slacapra 1.61 import tarfile
710     try: # create tar ball
711     tar = tarfile.open(self.tgzNameWithPath, "w:gz")
712     ## First find the executable
713 slacapra 1.86 if (self.executable != ''):
714 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
715     if ( not exeWithPath ):
716     raise CrabException('User executable '+executable+' not found')
717 ewv 1.131
718 slacapra 1.61 ## then check if it's private or not
719     if exeWithPath.find(swReleaseTop) == -1:
720     # the exe is private, so we must ship
721     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
722     path = swArea+'/'
723 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
724     if exeWithPath.find(path) >= 0 :
725     exe = string.replace(exeWithPath, path,'')
726 slacapra 1.129 tar.add(path+exe,exe)
727 corvo 1.85 else :
728     tar.add(exeWithPath,os.path.basename(executable))
729 slacapra 1.61 pass
730     else:
731     # the exe is from release, we'll find it on WN
732     pass
733 ewv 1.131
734 slacapra 1.61 ## Now get the libraries: only those in local working area
735     libDir = 'lib'
736     lib = swArea+'/' +libDir
737     common.logger.debug(5,"lib "+lib+" to be tarred")
738     if os.path.exists(lib):
739     tar.add(lib,libDir)
740 ewv 1.131
741 slacapra 1.61 ## Now check if module dir is present
742     moduleDir = 'module'
743     module = swArea + '/' + moduleDir
744     if os.path.isdir(module):
745     tar.add(module,moduleDir)
746    
747     ## Now check if any data dir(s) is present
748     swAreaLen=len(swArea)
749     for root, dirs, files in os.walk(swArea):
750     if "data" in dirs:
751     common.logger.debug(5,"data "+root+"/data"+" to be tarred")
752     tar.add(root+"/data",root[swAreaLen:]+"/data")
753    
754 ewv 1.156 ### Removed ProdAgent Api dependencies ###
755 fanzago 1.152 ### Add ProdAgent dir to tar
756     #paDir = 'ProdAgentApi'
757     #pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
758     #if os.path.isdir(pa):
759     # tar.add(pa,paDir)
760 fanzago 1.93
761 fanzago 1.152 ## Add ProdCommon dir to tar
762 fanzago 1.93 prodcommonDir = 'ProdCommon'
763     prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
764     if os.path.isdir(prodcommonPath):
765     tar.add(prodcommonPath,prodcommonDir)
766 ewv 1.131
767 slacapra 1.61 common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
768     tar.close()
769     except :
770     raise CrabException('Could not create tar-ball')
771 gutsche 1.72
772     ## check for tarball size
773     tarballinfo = os.stat(self.tgzNameWithPath)
774     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
775     raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
776    
777 slacapra 1.61 ## create tar-ball with ML stuff
778 ewv 1.131 self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
779 slacapra 1.61 try:
780     tar = tarfile.open(self.MLtgzfile, "w:gz")
781     path=os.environ['CRABDIR'] + '/python/'
782 ewv 1.160 for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py','writeCfg.py']:
783 slacapra 1.61 tar.add(path+file,file)
784     common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
785     tar.close()
786     except :
787 corvo 1.58 raise CrabException('Could not create ML files tar-ball')
788 ewv 1.131
789 slacapra 1.1 return
790 ewv 1.131
791 slacapra 1.97 def additionalInputFileTgz(self):
792     """
793     Put all additional files into a tar ball and return its name
794     """
795     import tarfile
796     tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
797     tar = tarfile.open(tarName, "w:gz")
798     for file in self.additional_inbox_files:
799     tar.add(file,string.split(file,'/')[-1])
800     common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
801     tar.close()
802     return tarName
803    
804 slacapra 1.1 def wsSetupEnvironment(self, nj):
805     """
806     Returns part of a job script which prepares
807     the execution environment for the job 'nj'.
808     """
809     # Prepare JobType-independent part
810 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
811 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
812 ewv 1.131 txt += 'if [ $middleware == LCG ]; then \n'
813 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
814     txt += 'elif [ $middleware == OSG ]; then\n'
815 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
816 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
817 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
818 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
819     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
820     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
821 gutsche 1.3 txt += ' exit 1\n'
822     txt += ' fi\n'
823 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
824 gutsche 1.3 txt += '\n'
825     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
826     txt += ' cd $WORKING_DIR\n'
827 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
828 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
829 fanzago 1.133 #txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
830     #txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
831 gutsche 1.3 txt += 'fi\n'
832 slacapra 1.1
833     # Prepare JobType-specific part
834     scram = self.scram.commandName()
835     txt += '\n\n'
836 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
837     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
838 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
839     txt += 'status=$?\n'
840     txt += 'if [ $status != 0 ] ; then\n'
841 fanzago 1.133 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
842     txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
843     txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
844     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
845 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
846     txt += ' cd $RUNTIME_AREA\n'
847 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
848     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
849 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
850     txt += ' if [ -d $WORKING_DIR ] ;then\n'
851 fanzago 1.96 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
852     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
853     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
854     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
855 gutsche 1.3 txt += ' fi\n'
856     txt += ' fi \n'
857 fanzago 1.133 txt += ' exit 1 \n'
858 slacapra 1.1 txt += 'fi \n'
859     txt += 'cd '+self.version+'\n'
860 fanzago 1.99 ########## FEDE FOR DBS2 ######################
861     txt += 'SOFTWARE_DIR=`pwd`\n'
862 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
863 fanzago 1.99 ###############################################
864 slacapra 1.1 ### needed grep for bug in scramv1 ###
865     txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
866     # Handle the arguments:
867     txt += "\n"
868 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
869 slacapra 1.1 txt += "\n"
870 mcinquil 1.140 txt += "if [ $nargs -lt "+str(len(self.argsList[nj].split()))+" ]\n"
871 slacapra 1.1 txt += "then\n"
872 mkirn 1.33 txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
873 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
874 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
875 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
876 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
877     txt += ' cd $RUNTIME_AREA\n'
878 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
879     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
880 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
881     txt += ' if [ -d $WORKING_DIR ] ;then\n'
882 fanzago 1.96 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
883     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
884     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
885     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
886 gutsche 1.3 txt += ' fi\n'
887     txt += ' fi \n'
888 slacapra 1.1 txt += " exit 1\n"
889     txt += "fi\n"
890     txt += "\n"
891    
892     # Prepare job-specific part
893     job = common.job_list[nj]
894 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
895 ewv 1.131 if (self.datasetPath):
896 fanzago 1.93 txt += '\n'
897     txt += 'DatasetPath='+self.datasetPath+'\n'
898    
899     datasetpath_split = self.datasetPath.split("/")
900 ewv 1.131
901 fanzago 1.93 txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
902     txt += 'DataTier='+datasetpath_split[2]+'\n'
903 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
904 fanzago 1.93
905     else:
906     txt += 'DatasetPath=MCDataTier\n'
907     txt += 'PrimaryDataset=null\n'
908     txt += 'DataTier=null\n'
909     txt += 'ApplicationFamily=MCDataTier\n'
910 spiga 1.42 if self.pset != None: #CarlosDaniele
911     pset = os.path.basename(job.configFilename())
912     txt += '\n'
913 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
914 spiga 1.42 if (self.datasetPath): # standard job
915 ewv 1.160 txt += 'InputFiles=${args[1]}; export InputFiles\n'
916     txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
917     txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
918 spiga 1.42 txt += 'echo "Inputfiles:<$InputFiles>"\n'
919     txt += 'echo "MaxEvents:<$MaxEvents>"\n'
920     txt += 'echo "SkipEvents:<$SkipEvents>"\n'
921     else: # pythia like job
922 ewv 1.160 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
923     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
924     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
925     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
926 slacapra 1.90 if (self.firstRun):
927 ewv 1.160 txt += 'FirstRun=${args[1]}; export FirstRun\n'
928 spiga 1.57 txt += 'echo "FirstRun: <$FirstRun>"\n'
929 slacapra 1.90
930     txt += 'mv -f '+pset+' pset.cfg\n'
931 slacapra 1.1
932     if len(self.additional_inbox_files) > 0:
933 slacapra 1.97 txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
934     txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
935     txt += 'fi\n'
936 ewv 1.131 pass
937 slacapra 1.1
938 spiga 1.42 if self.pset != None: #CarlosDaniele
939     txt += '\n'
940     txt += 'echo "***** cat pset.cfg *********"\n'
941     txt += 'cat pset.cfg\n'
942     txt += 'echo "****** end pset.cfg ********"\n'
943     txt += '\n'
944 fanzago 1.93 ### FEDE FOR DBS OUTPUT PUBLICATION
945 fanzago 1.94 txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
946     txt += 'echo "PSETHASH = $PSETHASH" \n'
947 ewv 1.131 ##############
948 fanzago 1.93 txt += '\n'
949 gutsche 1.3 return txt
950    
951 slacapra 1.63 def wsBuildExe(self, nj=0):
952 gutsche 1.3 """
953     Put in the script the commands to build an executable
954     or a library.
955     """
956    
957 ewv 1.160 txt = '\n#Written by cms_cmssw::wsBuildExe\n'
958 gutsche 1.3
959     if os.path.isfile(self.tgzNameWithPath):
960 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
961 gutsche 1.3 txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
962     txt += 'untar_status=$? \n'
963     txt += 'if [ $untar_status -ne 0 ]; then \n'
964     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
965     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
966 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
967 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
968     txt += ' cd $RUNTIME_AREA\n'
969 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
970     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
971 gutsche 1.3 txt += ' /bin/rm -rf $WORKING_DIR\n'
972     txt += ' if [ -d $WORKING_DIR ] ;then\n'
973 gutsche 1.13 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
974     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
975     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
976     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
977 gutsche 1.3 txt += ' fi\n'
978     txt += ' fi \n'
979     txt += ' \n'
980 gutsche 1.7 txt += ' exit 1 \n'
981 gutsche 1.3 txt += 'else \n'
982     txt += ' echo "Successful untar" \n'
983     txt += 'fi \n'
984 gutsche 1.50 txt += '\n'
985 fanzago 1.152 #### Removed ProdAgent API dependencies
986     txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
987 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
988 fanzago 1.93 #### FEDE FOR DBS OUTPUT PUBLICATION
989 fanzago 1.152 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
990 gutsche 1.50 txt += 'else\n'
991 fanzago 1.152 txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
992 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
993 ewv 1.131 ###################
994 gutsche 1.50 txt += 'fi\n'
995     txt += '\n'
996    
997 gutsche 1.3 pass
998 ewv 1.131
999 slacapra 1.1 return txt
1000    
1001     def modifySteeringCards(self, nj):
1002     """
1003 ewv 1.131 modify the card provided by the user,
1004 slacapra 1.1 writing a new card into share dir
1005     """
1006 ewv 1.131
1007 slacapra 1.1 def executableName(self):
1008 slacapra 1.70 if self.scriptExe: #CarlosDaniele
1009 spiga 1.42 return "sh "
1010     else:
1011     return self.executable
1012 slacapra 1.1
1013     def executableArgs(self):
1014 ewv 1.160 # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
1015 slacapra 1.70 if self.scriptExe:#CarlosDaniele
1016 spiga 1.42 return self.scriptExe + " $NJob"
1017 fanzago 1.115 else:
1018     version_array = self.scram.getSWVersion().split('_')
1019     major = 0
1020     minor = 0
1021     try:
1022     major = int(version_array[1])
1023     minor = int(version_array[2])
1024     except:
1025 ewv 1.131 msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1026 fanzago 1.115 raise CrabException(msg)
1027 ewv 1.160
1028     ex_args = ""
1029    
1030     # Framework job report
1031 fanzago 1.115 if major >= 1 and minor >= 5 :
1032 ewv 1.160 ex_args += " -j " + self.fjrFileName
1033    
1034     # Type of cfg file
1035     if major >= 2 :
1036     ex_args += " -p pset.pycfg"
1037 fanzago 1.115 else:
1038 ewv 1.160 ex_args += " -p pset.cfg"
1039     return ex_args
1040 slacapra 1.1
1041     def inputSandbox(self, nj):
1042     """
1043     Returns a list of filenames to be put in JDL input sandbox.
1044     """
1045     inp_box = []
1046 slacapra 1.53 # # dict added to delete duplicate from input sandbox file list
1047     # seen = {}
1048 slacapra 1.1 ## code
1049     if os.path.isfile(self.tgzNameWithPath):
1050     inp_box.append(self.tgzNameWithPath)
1051 corvo 1.58 if os.path.isfile(self.MLtgzfile):
1052     inp_box.append(self.MLtgzfile)
1053 slacapra 1.1 ## config
1054 slacapra 1.70 if not self.pset is None:
1055 corvo 1.56 inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1056 slacapra 1.1 ## additional input files
1057 slacapra 1.97 tgz = self.additionalInputFileTgz()
1058     inp_box.append(tgz)
1059 slacapra 1.1 return inp_box
1060    
1061     def outputSandbox(self, nj):
1062     """
1063     Returns a list of filenames to be put in JDL output sandbox.
1064     """
1065     out_box = []
1066    
1067     ## User Declared output files
1068 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
1069 ewv 1.131 n_out = nj + 1
1070 slacapra 1.1 out_box.append(self.numberFile_(out,str(n_out)))
1071     return out_box
1072    
1073     def prepareSteeringCards(self):
1074     """
1075     Make initial modifications of the user's steering card file.
1076     """
1077     return
1078    
1079     def wsRenameOutput(self, nj):
1080     """
1081     Returns part of a job script which renames the produced files.
1082     """
1083    
1084 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1085 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1086     txt += 'echo ">>> current directory content:"\n'
1087 gutsche 1.7 txt += 'ls \n'
1088 fanzago 1.145 txt += '\n'
1089 slacapra 1.54
1090 fanzago 1.128 txt += 'output_exit_status=0\n'
1091 ewv 1.131
1092 fanzago 1.128 for fileWithSuffix in (self.output_file_sandbox):
1093     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1094     txt += '\n'
1095     txt += '# check output file\n'
1096     txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1097 mcinquil 1.144 txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1098     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1099 fanzago 1.128 txt += 'else\n'
1100     txt += ' exit_status=60302\n'
1101 fanzago 1.150 txt += ' echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1102 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
1103 fanzago 1.128 txt += ' if [ $middleware == OSG ]; then \n'
1104     txt += ' echo "prepare dummy output file"\n'
1105     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1106     txt += ' fi \n'
1107     txt += 'fi\n'
1108 ewv 1.131
1109 fanzago 1.128 for fileWithSuffix in (self.output_file):
1110 slacapra 1.1 output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1111     txt += '\n'
1112 gutsche 1.7 txt += '# check output file\n'
1113 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1114 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1115     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
1116     txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1117     else:
1118     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1119     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1120 slacapra 1.106 txt += 'else\n'
1121 fanzago 1.117 txt += ' exit_status=60302\n'
1122 fanzago 1.150 txt += ' echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1123 fanzago 1.128 txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1124     txt += ' output_exit_status=$exit_status\n'
1125 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
1126 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
1127     txt += ' echo "prepare dummy output file"\n'
1128     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1129     txt += ' fi \n'
1130 slacapra 1.1 txt += 'fi\n'
1131 slacapra 1.105 file_list = []
1132     for fileWithSuffix in (self.output_file):
1133     file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1134 ewv 1.131
1135 slacapra 1.105 txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1136 fanzago 1.149 txt += '\n'
1137 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1138     txt += 'echo ">>> current directory content:"\n'
1139     txt += 'ls \n'
1140     txt += '\n'
1141 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
1142 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1143 slacapra 1.1 return txt
1144    
1145     def numberFile_(self, file, txt):
1146     """
1147     append _'txt' before last extension of a file
1148     """
1149     p = string.split(file,".")
1150     # take away last extension
1151     name = p[0]
1152     for x in p[1:-1]:
1153 slacapra 1.90 name=name+"."+x
1154 slacapra 1.1 # add "_txt"
1155     if len(p)>1:
1156 slacapra 1.90 ext = p[len(p)-1]
1157     result = name + '_' + txt + "." + ext
1158 slacapra 1.1 else:
1159 slacapra 1.90 result = name + '_' + txt
1160 ewv 1.131
1161 slacapra 1.1 return result
1162    
1163 slacapra 1.63 def getRequirements(self, nj=[]):
1164 slacapra 1.1 """
1165 ewv 1.131 return job requirements to add to jdl files
1166 slacapra 1.1 """
1167     req = ''
1168 slacapra 1.47 if self.version:
1169 slacapra 1.10 req='Member("VO-cms-' + \
1170 slacapra 1.47 self.version + \
1171 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1172 farinafa 1.111 ## SL add requirement for OS version only if SL4
1173     #reSL4 = re.compile( r'slc4' )
1174 slacapra 1.109 if self.executable_arch: # and reSL4.search(self.executable_arch):
1175 gutsche 1.107 req+=' && Member("VO-cms-' + \
1176 slacapra 1.105 self.executable_arch + \
1177     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1178 gutsche 1.35
1179     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1180 afanfani 1.158 if common.scheduler.name() == "glitecoll":
1181     req += ' && other.GlueCEStateStatus == "Production" '
1182 gutsche 1.35
1183 slacapra 1.1 return req
1184 gutsche 1.3
1185     def configFilename(self):
1186     """ return the config filename """
1187     return self.name()+'.cfg'
1188    
1189     def wsSetupCMSOSGEnvironment_(self):
1190     """
1191     Returns part of a job script which is prepares
1192     the execution environment and which is common for all CMS jobs.
1193     """
1194 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1195     txt += ' echo ">>> setup CMS OSG environment:"\n'
1196 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1197     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1198 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1199 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1200 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1201 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1202     txt += ' else\n'
1203 ewv 1.135 txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1204 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1205     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1206     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1207 gutsche 1.3 txt += '\n'
1208 fanzago 1.133 txt += ' cd $RUNTIME_AREA\n'
1209     txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1210     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1211     txt += ' /bin/rm -rf $WORKING_DIR\n'
1212     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1213 ewv 1.135 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1214 fanzago 1.133 txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1215     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1216     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1217     txt += ' fi\n'
1218 gutsche 1.3 txt += '\n'
1219 fanzago 1.133 txt += ' exit 1\n'
1220     txt += ' fi\n'
1221 gutsche 1.3 txt += '\n'
1222 fanzago 1.133 txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1223 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1224 gutsche 1.3
1225     return txt
1226 ewv 1.131
1227 gutsche 1.3 ### OLI_DANIELE
1228     def wsSetupCMSLCGEnvironment_(self):
1229     """
1230     Returns part of a job script which is prepares
1231     the execution environment and which is common for all CMS jobs.
1232     """
1233 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1234     txt += ' echo ">>> setup CMS LCG environment:"\n'
1235 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1236     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1237     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1238     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1239     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1240     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1241     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1242     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1243     txt += ' exit 1\n'
1244     txt += ' else\n'
1245     txt += ' echo "Sourcing environment... "\n'
1246     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1247     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1248     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1249     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1250     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1251     txt += ' exit 1\n'
1252     txt += ' fi\n'
1253     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1254     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1255     txt += ' result=$?\n'
1256     txt += ' if [ $result -ne 0 ]; then\n'
1257     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1258     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1259     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1260     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1261     txt += ' exit 1\n'
1262     txt += ' fi\n'
1263     txt += ' fi\n'
1264     txt += ' \n'
1265     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1266 gutsche 1.3 return txt
1267 gutsche 1.5
1268 ewv 1.131 ### FEDE FOR DBS OUTPUT PUBLICATION
1269 fanzago 1.93 def modifyReport(self, nj):
1270     """
1271 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1272 fanzago 1.93 """
1273 fanzago 1.94
1274 ewv 1.160 txt = '\n#Written by cms_cmssw::modifyReport\n'
1275 fanzago 1.94 try:
1276 ewv 1.131 publish_data = int(self.cfg_params['USER.publish_data'])
1277 fanzago 1.94 except KeyError:
1278     publish_data = 0
1279 ewv 1.131 if (publish_data == 1):
1280 fanzago 1.133 txt += 'echo ">>> Modify Job Report:" \n'
1281 fanzago 1.122 ################ FEDE FOR DBS2 #############################################
1282 fanzago 1.152 #txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1283     txt += 'chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1284 fanzago 1.122 #############################################################################
1285 fanzago 1.94
1286 fanzago 1.122 txt += 'if [ -z "$SE" ]; then\n'
1287     txt += ' SE="" \n'
1288 ewv 1.131 txt += 'fi \n'
1289 fanzago 1.122 txt += 'if [ -z "$SE_PATH" ]; then\n'
1290     txt += ' SE_PATH="" \n'
1291 ewv 1.131 txt += 'fi \n'
1292     txt += 'echo "SE = $SE"\n'
1293 fanzago 1.122 txt += 'echo "SE_PATH = $SE_PATH"\n'
1294 fanzago 1.94
1295     processedDataset = self.cfg_params['USER.publish_data_name']
1296     txt += 'ProcessedDataset='+processedDataset+'\n'
1297     #### LFN=/store/user/<user>/processedDataset_PSETHASH
1298     txt += 'if [ "$SE_PATH" == "" ]; then\n'
1299 fanzago 1.101 #### FEDE: added slash in LFN ##############
1300     txt += ' FOR_LFN=/copy_problems/ \n'
1301 ewv 1.131 txt += 'else \n'
1302 fanzago 1.160.2.1 #### LFN is <LFNBaseName>_PSETHASH
1303     txt += ' FOR_LFN=%s_${PSETHASH}/\n'%(LFNBaseName)
1304     #txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1305 fanzago 1.101 ##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1306 fanzago 1.160.2.1 #txt += ' FOR_LFN=/store$tmp \n'
1307 ewv 1.131 txt += 'fi \n'
1308 fanzago 1.94 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1309     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1310 spiga 1.103 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1311 fanzago 1.152 txt += 'echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1312     txt += '$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1313 ewv 1.131
1314 spiga 1.103 txt += 'modifyReport_result=$?\n'
1315     txt += 'echo modifyReport_result = $modifyReport_result\n'
1316     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1317     txt += ' exit_status=1\n'
1318     txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1319     txt += 'else\n'
1320     txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1321     txt += 'fi\n'
1322 fanzago 1.94 else:
1323 fanzago 1.122 txt += 'echo "no data publication required"\n'
1324 fanzago 1.93 return txt
1325 fanzago 1.99
1326     def cleanEnv(self):
1327 ewv 1.160 txt = '\n#Written by cms_cmssw::cleanEnv\n'
1328 ewv 1.131 txt += 'if [ $middleware == OSG ]; then\n'
1329 fanzago 1.99 txt += ' cd $RUNTIME_AREA\n'
1330 fanzago 1.133 txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1331     txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1332 fanzago 1.99 txt += ' /bin/rm -rf $WORKING_DIR\n'
1333     txt += ' if [ -d $WORKING_DIR ] ;then\n'
1334 fanzago 1.133 txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1335     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1336     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1337     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1338 fanzago 1.99 txt += ' fi\n'
1339     txt += 'fi\n'
1340     txt += '\n'
1341     return txt
1342 fanzago 1.93
1343 gutsche 1.5 def setParam_(self, param, value):
1344     self._params[param] = value
1345    
1346     def getParams(self):
1347     return self._params
1348 gutsche 1.8
1349 gutsche 1.35 def uniquelist(self, old):
1350     """
1351     remove duplicates from a list
1352     """
1353     nd={}
1354     for e in old:
1355     nd[e]=0
1356     return nd.keys()
1357 mcinquil 1.121
1358    
1359     def checkOut(self, limit):
1360     """
1361     check the dimension of the output files
1362     """
1363 ewv 1.160 txt = '\n#Written by cms_cmssw::checkOut\n'
1364     txt += 'echo ">>> Starting output sandbox limit check :"\n'
1365 mcinquil 1.121 listOutFiles = []
1366 slacapra 1.151 txt += 'stdoutFile=`ls *stdout` \n'
1367     txt += 'stderrFile=`ls *stderr` \n'
1368 fanzago 1.148 if (self.return_data == 1):
1369 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1370     listOutFiles.append(self.numberFile_(file, '$NJob'))
1371     listOutFiles.append('$stdoutFile')
1372     listOutFiles.append('$stderrFile')
1373 ewv 1.156 else:
1374 spiga 1.157 for file in (self.output_file_sandbox):
1375     listOutFiles.append(self.numberFile_(file, '$NJob'))
1376     listOutFiles.append('$stdoutFile')
1377     listOutFiles.append('$stderrFile')
1378 ewv 1.159
1379 spiga 1.157 txt += 'echo "OUTPUT files: '+string.join(listOutFiles,' ')+'"\n'
1380     txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1381     # txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1382 mcinquil 1.121 txt += 'ls -gGhrta;\n'
1383     txt += 'sum=0;\n'
1384 spiga 1.157 txt += 'for file in $filesToCheck ; do\n'
1385 mcinquil 1.121 txt += ' if [ -e $file ]; then\n'
1386     txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1387     txt += ' sum=`expr $sum + $tt`\n'
1388     txt += ' else\n'
1389     txt += ' echo "WARNING: output file $file not found!"\n'
1390     txt += ' fi\n'
1391     txt += 'done\n'
1392     txt += 'echo "Total Output dimension: $sum";\n'
1393     txt += 'limit='+str(limit)+';\n'
1394     txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1395     txt += 'if [ $limit -lt $sum ]; then\n'
1396     txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1397     txt += ' echo " checking the output file sizes..."\n'
1398     txt += ' tot=0;\n'
1399 spiga 1.157 txt += ' for filefile in $filesToCheck ; do\n'
1400 mcinquil 1.143 txt += ' dimFile=`ls -gGrta $filefile | awk \'{ print $3 }\';`\n'
1401 mcinquil 1.121 txt += ' tot=`expr $tot + $tt`;\n'
1402 mcinquil 1.143 txt += ' if [ $limit -lt $dimFile ]; then\n'
1403     txt += ' echo "deleting file: $filefile";\n'
1404     txt += ' rm -f $filefile\n'
1405     txt += ' elif [ $limit -lt $tot ]; then\n'
1406     txt += ' echo "deleting file: $filefile";\n'
1407     txt += ' rm -f $filefile\n'
1408     txt += ' else\n'
1409     txt += ' echo "saving file: $filefile"\n'
1410 mcinquil 1.121 txt += ' fi\n'
1411     txt += ' done\n'
1412 mcinquil 1.143
1413 mcinquil 1.121 txt += ' ls -agGhrt;\n'
1414     txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1415     txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1416     txt += ' exit_status=70000;\n'
1417     txt += 'else'
1418     txt += ' echo "Total Output dimension $sum is fine.";\n'
1419     txt += 'fi\n'
1420 fanzago 1.133 txt += 'echo "Ending output sandbox limit check"\n'
1421 mcinquil 1.121 return txt