ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.12
Committed: Mon Jun 26 13:39:31 2006 UTC (18 years, 10 months ago) by fanzago
Content type: text/x-python
Branch: MAIN
Changes since 1.11: +19 -2 lines
Log Message:
changed common.logger.message for total_number_of_events

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5     import common
6 gutsche 1.3 import PsetManipulator
7 slacapra 1.1
8 gutsche 1.3 import DBSInfo_EDM
9     import DataDiscovery_EDM
10     import DataLocation_EDM
11 slacapra 1.1 import Scram
12    
13     import os, string, re
14    
15     class Cmssw(JobType):
16     def __init__(self, cfg_params):
17     JobType.__init__(self, 'CMSSW')
18     common.logger.debug(3,'CMSSW::__init__')
19    
20     self.analisys_common_info = {}
21 gutsche 1.3 # Marco.
22     self._params = {}
23     self.cfg_params = cfg_params
24 slacapra 1.1 log = common.logger
25    
26     self.scram = Scram.Scram(cfg_params)
27     scramArea = ''
28     self.additional_inbox_files = []
29     self.scriptExe = ''
30     self.executable = ''
31     self.tgz_name = 'default.tgz'
32    
33 gutsche 1.3
34 slacapra 1.1 self.version = self.scram.getSWVersion()
35 gutsche 1.5 self.setParam_('application', self.version)
36 slacapra 1.1 common.analisys_common_info['sw_version'] = self.version
37 gutsche 1.3 ### FEDE
38     common.analisys_common_info['copy_input_data'] = 0
39     common.analisys_common_info['events_management'] = 1
40 slacapra 1.1
41     ### collect Data cards
42     try:
43 slacapra 1.9 tmp = cfg_params['CMSSW.datasetpath']
44     log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
45     if string.lower(tmp)=='none':
46     self.datasetPath = None
47     else:
48     self.datasetPath = tmp
49 slacapra 1.1 except KeyError:
50 gutsche 1.3 msg = "Error: datasetpath not defined "
51 slacapra 1.1 raise CrabException(msg)
52 gutsche 1.5
53     # ML monitoring
54     # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
55 slacapra 1.9 if not self.datasetPath:
56     self.setParam_('dataset', 'None')
57     self.setParam_('owner', 'None')
58     else:
59     datasetpath_split = self.datasetPath.split("/")
60     self.setParam_('dataset', datasetpath_split[1])
61     self.setParam_('owner', datasetpath_split[-1])
62    
63 gutsche 1.8 self.setTaskid_()
64     self.setParam_('taskId', self.cfg_params['taskId'])
65 gutsche 1.5
66 slacapra 1.1 self.dataTiers = []
67    
68     ## now the application
69     try:
70     self.executable = cfg_params['CMSSW.executable']
71 gutsche 1.5 self.setParam_('exe', self.executable)
72 slacapra 1.1 log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
73     msg = "Default executable cmsRun overridden. Switch to " + self.executable
74     log.debug(3,msg)
75     except KeyError:
76     self.executable = 'cmsRun'
77 gutsche 1.5 self.setParam_('exe', self.executable)
78 slacapra 1.1 msg = "User executable not defined. Use cmsRun"
79     log.debug(3,msg)
80     pass
81    
82     try:
83     self.pset = cfg_params['CMSSW.pset']
84     log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
85     if (not os.path.exists(self.pset)):
86     raise CrabException("User defined PSet file "+self.pset+" does not exist")
87     except KeyError:
88     raise CrabException("PSet file missing. Cannot run cmsRun ")
89    
90     # output files
91     try:
92     self.output_file = []
93    
94     tmp = cfg_params['CMSSW.output_file']
95     if tmp != '':
96     tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
97     log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
98     for tmp in tmpOutFiles:
99     tmp=string.strip(tmp)
100     self.output_file.append(tmp)
101     pass
102     else:
103     log.message("No output file defined: only stdout/err will be available")
104     pass
105     pass
106     except KeyError:
107     log.message("No output file defined: only stdout/err will be available")
108     pass
109    
110     # script_exe file as additional file in inputSandbox
111     try:
112 slacapra 1.10 self.scriptExe = cfg_params['USER.script_exe']
113     self.additional_inbox_files.append(self.scriptExe)
114     if self.scriptExe != '':
115     if not os.path.isfile(self.scriptExe):
116     msg ="WARNING. file "+self.scriptExe+" not found"
117     raise CrabException(msg)
118 slacapra 1.1 except KeyError:
119     pass
120    
121     ## additional input files
122     try:
123     tmpAddFiles = string.split(cfg_params['CMSSW.additional_input_files'],',')
124     for tmp in tmpAddFiles:
125 gutsche 1.3 if not os.path.exists(tmp):
126     raise CrabException("Additional input file not found: "+tmp)
127 slacapra 1.1 tmp=string.strip(tmp)
128     self.additional_inbox_files.append(tmp)
129     pass
130     pass
131     except KeyError:
132     pass
133    
134 slacapra 1.9 # files per job
135 slacapra 1.1 try:
136 gutsche 1.3 self.filesPerJob = int(cfg_params['CMSSW.files_per_jobs']) #Daniele
137 slacapra 1.9 self.selectFilesPerJob = 1
138 gutsche 1.3 except KeyError:
139 slacapra 1.9 self.filesPerJob = 0
140     self.selectFilesPerJob = 0
141 gutsche 1.3
142 slacapra 1.9 ## Events per job
143 gutsche 1.3 try:
144 slacapra 1.10 self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
145 slacapra 1.9 self.selectEventsPerJob = 1
146 gutsche 1.3 except KeyError:
147 slacapra 1.9 self.eventsPerJob = -1
148     self.selectEventsPerJob = 0
149    
150 slacapra 1.10 # To be implemented
151     # ## number of jobs
152     # try:
153     # self.numberOfJobs =int( cfg_params['CMSSW.number_of_job'])
154     # self.selectNumberOfJobs = 1
155     # except KeyError:
156     # self.selectNumberOfJobs = 0
157    
158 slacapra 1.9 if (self.selectFilesPerJob == self.selectEventsPerJob):
159 slacapra 1.10 msg = 'Must define either files_per_jobs or events_per_job'
160 slacapra 1.9 raise CrabException(msg)
161    
162 slacapra 1.10 if (self.selectEventsPerJob and not self.datasetPath == None):
163     msg = 'Splitting according to events_per_job available only with None as datasetpath'
164     raise CrabException(msg)
165    
166 gutsche 1.3 try:
167 slacapra 1.1 self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
168     except KeyError:
169 gutsche 1.3 msg = 'Must define total_number_of_events'
170 slacapra 1.1 raise CrabException(msg)
171    
172     CEBlackList = []
173     try:
174     tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
175     for tmp in tmpBad:
176     tmp=string.strip(tmp)
177     CEBlackList.append(tmp)
178     except KeyError:
179     pass
180    
181     self.reCEBlackList=[]
182     for bad in CEBlackList:
183     self.reCEBlackList.append(re.compile( bad ))
184    
185     common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
186    
187     CEWhiteList = []
188     try:
189     tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
190     for tmp in tmpGood:
191     tmp=string.strip(tmp)
192     CEWhiteList.append(tmp)
193     except KeyError:
194     pass
195    
196     #print 'CEWhiteList: ',CEWhiteList
197     self.reCEWhiteList=[]
198     for Good in CEWhiteList:
199     self.reCEWhiteList.append(re.compile( Good ))
200    
201     common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList))
202    
203 gutsche 1.3 self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
204    
205 slacapra 1.1 #DBSDLS-start
206     ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
207     self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
208     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
209     ## Perform the data location and discovery (based on DBS/DLS)
210 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
211     common.analisys_common_info['sites']=None
212     if self.datasetPath:
213     self.DataDiscoveryAndLocation(cfg_params)
214 slacapra 1.1 #DBSDLS-end
215    
216     self.tgzNameWithPath = self.getTarBall(self.executable)
217    
218 slacapra 1.10 # modify Pset
219     if (self.datasetPath): # standard job
220     self.PsetEdit.maxEvent(self.eventsPerJob) #Daniele
221     self.PsetEdit.inputModule("INPUT") #Daniele
222    
223     else: # pythia like job
224     self.PsetEdit.maxEvent(self.eventsPerJob) #Daniele
225     self.PsetEdit.pythiaSeed("INPUT") #Daniele
226     try:
227     self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
228     except KeyError:
229     self.sourceSeed = 123456
230     common.logger.message("No seed given, will use "+str(self.sourceSeed))
231    
232     self.PsetEdit.psetWriter(self.configFilename())
233    
234 slacapra 1.9 ## Select Splitting
235     if self.selectFilesPerJob: self.jobSplittingPerFiles()
236     elif self.selectEventsPerJob: self.jobSplittingPerEvents()
237     else:
238     msg = 'Don\'t know how to split...'
239     raise CrabException(msg)
240 gutsche 1.5
241 gutsche 1.3
242 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
243    
244 gutsche 1.3 common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
245    
246     datasetPath=self.datasetPath
247    
248     ## TODO
249     dataTiersList = ""
250     dataTiers = dataTiersList.split(',')
251 slacapra 1.1
252     ## Contact the DBS
253     try:
254 afanfani 1.4 self.pubdata=DataDiscovery_EDM.DataDiscovery_EDM(datasetPath, dataTiers, cfg_params)
255 slacapra 1.1 self.pubdata.fetchDBSInfo()
256    
257 gutsche 1.3 except DataDiscovery_EDM.NotExistingDatasetError, ex :
258 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
259     raise CrabException(msg)
260    
261 gutsche 1.3 except DataDiscovery_EDM.NoDataTierinProvenanceError, ex :
262 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
263     raise CrabException(msg)
264 gutsche 1.3 except DataDiscovery_EDM.DataDiscoveryError, ex:
265 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS %s'%ex.getErrorMessage()
266     raise CrabException(msg)
267    
268     ## get list of all required data in the form of dbs paths (dbs path = /dataset/datatier/owner)
269 gutsche 1.3 ## self.DBSPaths=self.pubdata.getDBSPaths()
270     common.logger.message("Required data are :"+self.datasetPath)
271    
272     filesbyblock=self.pubdata.getFiles()
273     self.AllInputFiles=filesbyblock.values()
274     self.files = self.AllInputFiles
275    
276     ## TEMP
277     # self.filesTmp = filesbyblock.values()
278     # self.files = []
279     # locPath='rfio:cmsbose2.bo.infn.it:/flatfiles/SE00/cms/fanfani/ProdTest/'
280     # locPath=''
281     # tmp = []
282     # for file in self.filesTmp[0]:
283     # tmp.append(locPath+file)
284     # self.files.append(tmp)
285     ## END TEMP
286 slacapra 1.1
287     ## get max number of events
288 gutsche 1.3 #common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents())
289 slacapra 1.1 self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
290     common.logger.message("\nThe number of available events is %s"%self.maxEvents)
291    
292     ## Contact the DLS and build a list of sites hosting the fileblocks
293     try:
294 gutsche 1.6 dataloc=DataLocation_EDM.DataLocation_EDM(filesbyblock.keys(),cfg_params)
295     dataloc.fetchDLSInfo()
296 gutsche 1.3 except DataLocation_EDM.DataLocationError , ex:
297 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
298     raise CrabException(msg)
299    
300     allsites=dataloc.getSites()
301     common.logger.debug(5,"sites are %s"%allsites)
302     sites=self.checkBlackList(allsites)
303     common.logger.debug(5,"sites are (after black list) %s"%sites)
304     sites=self.checkWhiteList(sites)
305     common.logger.debug(5,"sites are (after white list) %s"%sites)
306    
307     if len(sites)==0:
308     msg = 'No sites hosting all the needed data! Exiting... '
309     raise CrabException(msg)
310 gutsche 1.3
311 slacapra 1.1 common.logger.message("List of Sites hosting the data : "+str(sites))
312     common.logger.debug(6, "List of Sites: "+str(sites))
313     common.analisys_common_info['sites']=sites ## used in SchedulerEdg.py in createSchScript
314 gutsche 1.5 self.setParam_('TargetCE', ','.join(sites))
315 slacapra 1.1 return
316 gutsche 1.3
317 slacapra 1.9 def jobSplittingPerFiles(self):
318     """
319     Perform job splitting based on number of files to be accessed per job
320 gutsche 1.3 """
321 slacapra 1.9 common.logger.debug(5,'Splitting per input files')
322 gutsche 1.3 common.logger.message('Required '+str(self.filesPerJob)+' files per job ')
323     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
324    
325     ## TODO: SL need to have (from DBS) a detailed list of how many events per each file
326     n_tot_files = (len(self.files[0]))
327 fanzago 1.12 #print "n_tot_files = ", n_tot_files
328 gutsche 1.3 ## SL: this is wrong if the files have different number of events
329 fanzago 1.12 #print "self.maxEvents = ", self.maxEvents
330 gutsche 1.3 evPerFile = int(self.maxEvents)/n_tot_files
331 fanzago 1.12 #print "evPerFile = int(self.maxEvents)/n_tot_files = ", evPerFile
332    
333 gutsche 1.3 common.logger.debug(5,'Events per File '+str(evPerFile))
334    
335     ## if asked to process all events, do it
336     if self.total_number_of_events == -1:
337     self.total_number_of_events=self.maxEvents
338     self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob)
339     common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for all available events '+str(self.total_number_of_events)+' events')
340    
341     else:
342 fanzago 1.12 #print "self.total_number_of_events = ", self.total_number_of_events
343     #print "evPerFile = ", evPerFile
344 gutsche 1.3 self.total_number_of_files = int(self.total_number_of_events/evPerFile)
345 fanzago 1.12 #print "self.total_number_of_files = int(self.total_number_of_events/evPerFile) = " , self.total_number_of_files
346 gutsche 1.3 ## SL: if ask for less event than what is computed to be available on a
347     ## file, process the first file anyhow.
348     if self.total_number_of_files == 0:
349     self.total_number_of_files = self.total_number_of_files + 1
350 fanzago 1.12
351 gutsche 1.3
352     common.logger.debug(5,'N files '+str(self.total_number_of_files))
353    
354     check = 0
355    
356     ## Compute the number of jobs
357     #self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob)
358 fanzago 1.12 #print "self.total_number_of_files = ", self.total_number_of_files
359     #print "self.filesPerJob = ", self.filesPerJob
360 gutsche 1.3 self.total_number_of_jobs = int(self.total_number_of_files/self.filesPerJob)
361 fanzago 1.12 #print "self.total_number_of_jobs = ", self.total_number_of_jobs
362 gutsche 1.3 common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
363    
364     ## is there any remainder?
365     check = int(self.total_number_of_files) - (int(self.total_number_of_jobs)*self.filesPerJob)
366    
367     common.logger.debug(5,'Check '+str(check))
368    
369     if check > 0:
370     self.total_number_of_jobs = self.total_number_of_jobs + 1
371     common.logger.message('Warning: last job will be created with '+str(check)+' files')
372    
373 fanzago 1.12 #common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for a total of '+str((self.total_number_of_jobs-1)*self.filesPerJob*evPerFile + check*evPerFile)+' events')
374     common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for a total of '+str((self.total_number_of_jobs)*self.filesPerJob*evPerFile + check*evPerFile)+' events')
375 gutsche 1.3 pass
376    
377     list_of_lists = []
378     for i in xrange(0, int(n_tot_files), self.filesPerJob):
379 slacapra 1.9 parString = "\\{"
380    
381     params = self.files[0][i: i+self.filesPerJob]
382     for i in range(len(params) - 1):
383     parString += '\\\"' + params[i] + '\\\"\,'
384    
385     parString += '\\\"' + params[len(params) - 1] + '\\\"\\}'
386     list_of_lists.append(parString)
387     pass
388    
389     self.list_of_args = list_of_lists
390 fanzago 1.11 #print self.list_of_args
391 slacapra 1.9 return
392    
393     def jobSplittingPerEvents(self):
394     """
395     Perform job splitting based on number of event per job
396     """
397     common.logger.debug(5,'Splitting per events')
398     common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
399     common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
400    
401 slacapra 1.10 if (self.total_number_of_events < 0):
402     msg='Cannot split jobs per Events with "-1" as total number of events'
403     raise CrabException(msg)
404    
405 slacapra 1.9 self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
406 fanzago 1.12
407     print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
408     print "self.total_number_of_events = ", self.total_number_of_events
409     print "self.eventsPerJob = ", self.eventsPerJob
410     print "self.total_number_of_jobs = ", self.total_number_of_jobs
411     print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
412 slacapra 1.9
413     common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
414    
415     # is there any remainder?
416     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
417    
418     common.logger.debug(5,'Check '+str(check))
419    
420     if check > 0:
421     common.logger.message('Warning: asked '+self.total_number_of_events+' but will do only '+(int(self.total_number_of_jobs)*self.eventsPerJob))
422    
423     common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
424    
425 slacapra 1.10 # argument is seed number.$i
426 slacapra 1.9 self.list_of_args = []
427     for i in range(self.total_number_of_jobs):
428 slacapra 1.10 self.list_of_args.append(int(str(self.sourceSeed)+str(i)))
429 slacapra 1.9 print self.list_of_args
430 gutsche 1.3
431     return
432    
433     def split(self, jobParams):
434    
435     common.jobDB.load()
436     #### Fabio
437     njobs = self.total_number_of_jobs
438 slacapra 1.9 arglist = self.list_of_args
439 gutsche 1.3 # create the empty structure
440     for i in range(njobs):
441     jobParams.append("")
442    
443     for job in range(njobs):
444 slacapra 1.9 jobParams[job] = str(arglist[job])
445 gutsche 1.3 common.jobDB.setArguments(job, jobParams[job])
446    
447     common.jobDB.save()
448     return
449    
450     def getJobTypeArguments(self, nj, sched):
451 slacapra 1.9 return common.jobDB.arguments(nj)
452 gutsche 1.3
453     def numberOfJobs(self):
454     # Fabio
455     return self.total_number_of_jobs
456    
457 slacapra 1.1 def checkBlackList(self, allSites):
458     if len(self.reCEBlackList)==0: return allSites
459     sites = []
460     for site in allSites:
461     common.logger.debug(10,'Site '+site)
462     good=1
463     for re in self.reCEBlackList:
464     if re.search(site):
465     common.logger.message('CE in black list, skipping site '+site)
466     good=0
467     pass
468     if good: sites.append(site)
469     if len(sites) == 0:
470     common.logger.debug(3,"No sites found after BlackList")
471     return sites
472    
473 gutsche 1.3 def checkWhiteList(self, allSites):
474 slacapra 1.1
475 gutsche 1.3 if len(self.reCEWhiteList)==0: return allSites
476 slacapra 1.1 sites = []
477 gutsche 1.3 for site in allSites:
478 slacapra 1.1 good=0
479     for re in self.reCEWhiteList:
480     if re.search(site):
481     common.logger.debug(5,'CE in white list, adding site '+site)
482     good=1
483     if not good: continue
484     sites.append(site)
485     if len(sites) == 0:
486     common.logger.message("No sites found after WhiteList\n")
487     else:
488     common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n")
489     return sites
490    
491     def getTarBall(self, exe):
492     """
493     Return the TarBall with lib and exe
494     """
495    
496     # if it exist, just return it
497     self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
498     if os.path.exists(self.tgzNameWithPath):
499     return self.tgzNameWithPath
500    
501     # Prepare a tar gzipped file with user binaries.
502     self.buildTar_(exe)
503    
504     return string.strip(self.tgzNameWithPath)
505    
506     def buildTar_(self, executable):
507    
508     # First of all declare the user Scram area
509     swArea = self.scram.getSWArea_()
510     #print "swArea = ", swArea
511     swVersion = self.scram.getSWVersion()
512     #print "swVersion = ", swVersion
513     swReleaseTop = self.scram.getReleaseTop_()
514     #print "swReleaseTop = ", swReleaseTop
515    
516     ## check if working area is release top
517     if swReleaseTop == '' or swArea == swReleaseTop:
518     return
519    
520     filesToBeTarred = []
521     ## First find the executable
522     if (self.executable != ''):
523     exeWithPath = self.scram.findFile_(executable)
524     # print exeWithPath
525     if ( not exeWithPath ):
526     raise CrabException('User executable '+executable+' not found')
527    
528     ## then check if it's private or not
529     if exeWithPath.find(swReleaseTop) == -1:
530     # the exe is private, so we must ship
531     common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
532     path = swArea+'/'
533     exe = string.replace(exeWithPath, path,'')
534     filesToBeTarred.append(exe)
535     pass
536     else:
537     # the exe is from release, we'll find it on WN
538     pass
539    
540     ## Now get the libraries: only those in local working area
541     libDir = 'lib'
542     lib = swArea+'/' +libDir
543     common.logger.debug(5,"lib "+lib+" to be tarred")
544     if os.path.exists(lib):
545     filesToBeTarred.append(libDir)
546    
547 gutsche 1.3 ## Now check if module dir is present
548     moduleDir = 'module'
549     if os.path.isdir(swArea+'/'+moduleDir):
550     filesToBeTarred.append(moduleDir)
551    
552 slacapra 1.1 ## Now check if the Data dir is present
553     dataDir = 'src/Data/'
554     if os.path.isdir(swArea+'/'+dataDir):
555     filesToBeTarred.append(dataDir)
556    
557     ## Create the tar-ball
558     if len(filesToBeTarred)>0:
559     cwd = os.getcwd()
560     os.chdir(swArea)
561     tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
562     for line in filesToBeTarred:
563     tarcmd = tarcmd + line + ' '
564     cout = runCommand(tarcmd)
565     if not cout:
566     raise CrabException('Could not create tar-ball')
567     os.chdir(cwd)
568     else:
569     common.logger.debug(5,"No files to be to be tarred")
570    
571     return
572    
573     def wsSetupEnvironment(self, nj):
574     """
575     Returns part of a job script which prepares
576     the execution environment for the job 'nj'.
577     """
578     # Prepare JobType-independent part
579 gutsche 1.3 txt = ''
580    
581     ## OLI_Daniele at this level middleware already known
582    
583     txt += 'if [ $middleware == LCG ]; then \n'
584     txt += self.wsSetupCMSLCGEnvironment_()
585     txt += 'elif [ $middleware == OSG ]; then\n'
586     txt += ' time=`date -u +"%s"`\n'
587     txt += ' WORKING_DIR=$OSG_WN_TMP/cms_$time\n'
588     txt += ' echo "Creating working directory: $WORKING_DIR"\n'
589     txt += ' /bin/mkdir -p $WORKING_DIR\n'
590     txt += ' if [ ! -d $WORKING_DIR ] ;then\n'
591 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
592     txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
593     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
594     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
595 gutsche 1.3 txt += ' exit 1\n'
596     txt += ' fi\n'
597     txt += '\n'
598     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
599     txt += ' cd $WORKING_DIR\n'
600     txt += self.wsSetupCMSOSGEnvironment_()
601     txt += 'fi\n'
602 slacapra 1.1
603     # Prepare JobType-specific part
604     scram = self.scram.commandName()
605     txt += '\n\n'
606     txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
607     txt += scram+' project CMSSW '+self.version+'\n'
608     txt += 'status=$?\n'
609     txt += 'if [ $status != 0 ] ; then\n'
610 gutsche 1.7 txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
611 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
612 gutsche 1.7 txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
613 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
614 gutsche 1.3 ## OLI_Daniele
615     txt += ' if [ $middleware == OSG ]; then \n'
616     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
617     txt += ' cd $RUNTIME_AREA\n'
618     txt += ' /bin/rm -rf $WORKING_DIR\n'
619     txt += ' if [ -d $WORKING_DIR ] ;then\n'
620 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
621     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
622     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
623     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
624 gutsche 1.3 txt += ' fi\n'
625     txt += ' fi \n'
626     txt += ' exit 1 \n'
627 slacapra 1.1 txt += 'fi \n'
628     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
629     txt += 'cd '+self.version+'\n'
630     ### needed grep for bug in scramv1 ###
631     txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
632    
633     # Handle the arguments:
634     txt += "\n"
635 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
636 slacapra 1.1 txt += "\n"
637     txt += "narg=$#\n"
638 gutsche 1.3 txt += "if [ $narg -lt 2 ]\n"
639 slacapra 1.1 txt += "then\n"
640     txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$narg+ \n"
641 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
642 gutsche 1.7 txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
643 slacapra 1.1 txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
644 gutsche 1.3 ## OLI_Daniele
645     txt += ' if [ $middleware == OSG ]; then \n'
646     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
647     txt += ' cd $RUNTIME_AREA\n'
648     txt += ' /bin/rm -rf $WORKING_DIR\n'
649     txt += ' if [ -d $WORKING_DIR ] ;then\n'
650 gutsche 1.7 txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
651     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
652     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
653     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
654 gutsche 1.3 txt += ' fi\n'
655     txt += ' fi \n'
656 slacapra 1.1 txt += " exit 1\n"
657     txt += "fi\n"
658     txt += "\n"
659    
660     # Prepare job-specific part
661     job = common.job_list[nj]
662     pset = os.path.basename(job.configFilename())
663     txt += '\n'
664 slacapra 1.10 if (self.datasetPath): # standard job
665     txt += 'InputFiles=$2\n'
666     txt += 'echo "Inputfiles:<$InputFiles>"\n'
667     txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
668     else: # pythia like job
669     txt += 'Seed=$2\n'
670     txt += 'echo "Seed: <$Seed>"\n'
671     txt += 'sed "s#INPUT#$Seed#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
672 slacapra 1.1
673     if len(self.additional_inbox_files) > 0:
674     for file in self.additional_inbox_files:
675     txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n'
676     txt += ' cp $RUNTIME_AREA/'+file+' .\n'
677     txt += ' chmod +x '+file+'\n'
678     txt += 'fi\n'
679     pass
680    
681     txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
682    
683     txt += '\n'
684     txt += 'echo "***** cat pset.cfg *********"\n'
685     txt += 'cat pset.cfg\n'
686     txt += 'echo "****** end pset.cfg ********"\n'
687 gutsche 1.3 txt += '\n'
688     # txt += 'echo "***** cat pset1.cfg *********"\n'
689     # txt += 'cat pset1.cfg\n'
690     # txt += 'echo "****** end pset1.cfg ********"\n'
691     return txt
692    
693     def wsBuildExe(self, nj):
694     """
695     Put in the script the commands to build an executable
696     or a library.
697     """
698    
699     txt = ""
700    
701     if os.path.isfile(self.tgzNameWithPath):
702     txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
703     txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
704     txt += 'untar_status=$? \n'
705     txt += 'if [ $untar_status -ne 0 ]; then \n'
706     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
707     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
708 gutsche 1.7 txt += ' echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
709 gutsche 1.3 txt += ' if [ $middleware == OSG ]; then \n'
710     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
711     txt += ' cd $RUNTIME_AREA\n'
712     txt += ' /bin/rm -rf $WORKING_DIR\n'
713     txt += ' if [ -d $WORKING_DIR ] ;then\n'
714 gutsche 1.7 txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
715     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
716     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
717     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
718 gutsche 1.3 txt += ' fi\n'
719     txt += ' fi \n'
720     txt += ' \n'
721 gutsche 1.7 txt += ' exit 1 \n'
722 gutsche 1.3 txt += 'else \n'
723     txt += ' echo "Successful untar" \n'
724     txt += 'fi \n'
725     pass
726    
727 slacapra 1.1 return txt
728    
729     def modifySteeringCards(self, nj):
730     """
731     modify the card provided by the user,
732     writing a new card into share dir
733     """
734    
735     def executableName(self):
736     return self.executable
737    
738     def executableArgs(self):
739 gutsche 1.3 return " -p pset.cfg"
740 slacapra 1.1
741     def inputSandbox(self, nj):
742     """
743     Returns a list of filenames to be put in JDL input sandbox.
744     """
745     inp_box = []
746     # dict added to delete duplicate from input sandbox file list
747     seen = {}
748     ## code
749     if os.path.isfile(self.tgzNameWithPath):
750     inp_box.append(self.tgzNameWithPath)
751     ## config
752     inp_box.append(common.job_list[nj].configFilename())
753     ## additional input files
754 gutsche 1.3 #for file in self.additional_inbox_files:
755     # inp_box.append(common.work_space.cwdDir()+file)
756 slacapra 1.1 return inp_box
757    
758     def outputSandbox(self, nj):
759     """
760     Returns a list of filenames to be put in JDL output sandbox.
761     """
762     out_box = []
763    
764     stdout=common.job_list[nj].stdout()
765     stderr=common.job_list[nj].stderr()
766    
767     ## User Declared output files
768     for out in self.output_file:
769     n_out = nj + 1
770     out_box.append(self.numberFile_(out,str(n_out)))
771     return out_box
772     return []
773    
774     def prepareSteeringCards(self):
775     """
776     Make initial modifications of the user's steering card file.
777     """
778     return
779    
780     def wsRenameOutput(self, nj):
781     """
782     Returns part of a job script which renames the produced files.
783     """
784    
785     txt = '\n'
786 gutsche 1.7 txt += '# directory content\n'
787     txt += 'ls \n'
788 slacapra 1.1 file_list = ''
789     for fileWithSuffix in self.output_file:
790     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
791 gutsche 1.7 file_list=file_list+output_file_num+' '
792 slacapra 1.1 txt += '\n'
793 gutsche 1.7 txt += '# check output file\n'
794 slacapra 1.1 txt += 'ls '+fileWithSuffix+'\n'
795     txt += 'exe_result=$?\n'
796     txt += 'if [ $exe_result -ne 0 ] ; then\n'
797     txt += ' echo "ERROR: No output file to manage"\n'
798 gutsche 1.7 txt += ' echo "JOB_EXIT_STATUS = $exe_result"\n'
799     txt += ' echo "JobExitCode=60302" | tee -a $RUNTIME_AREA/$repo\n'
800     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
801 gutsche 1.3 ### OLI_DANIELE
802 gutsche 1.7 if common.scheduler.boss_scheduler_name == 'condor_g':
803     txt += ' if [ $middleware == OSG ]; then \n'
804     txt += ' echo "prepare dummy output file"\n'
805     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
806     txt += ' fi \n'
807 slacapra 1.1 txt += 'else\n'
808     txt += ' cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
809     txt += 'fi\n'
810    
811 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
812 slacapra 1.1 file_list=file_list[:-1]
813 slacapra 1.2 txt += 'file_list="'+file_list+'"\n'
814 gutsche 1.3 ### OLI_DANIELE
815     txt += 'if [ $middleware == OSG ]; then\n'
816     txt += ' cd $RUNTIME_AREA\n'
817     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
818     txt += ' /bin/rm -rf $WORKING_DIR\n'
819     txt += ' if [ -d $WORKING_DIR ] ;then\n'
820 gutsche 1.7 txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
821     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
822     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
823     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
824 gutsche 1.3 txt += ' fi\n'
825     txt += 'fi\n'
826     txt += '\n'
827 slacapra 1.1 return txt
828    
829     def numberFile_(self, file, txt):
830     """
831     append _'txt' before last extension of a file
832     """
833     p = string.split(file,".")
834     # take away last extension
835     name = p[0]
836     for x in p[1:-1]:
837     name=name+"."+x
838     # add "_txt"
839     if len(p)>1:
840     ext = p[len(p)-1]
841     #result = name + '_' + str(txt) + "." + ext
842     result = name + '_' + txt + "." + ext
843     else:
844     #result = name + '_' + str(txt)
845     result = name + '_' + txt
846    
847     return result
848    
849     def getRequirements(self):
850     """
851     return job requirements to add to jdl files
852     """
853     req = ''
854 slacapra 1.10 if common.analisys_common_info['sw_version']:
855     req='Member("VO-cms-' + \
856     common.analisys_common_info['sw_version'] + \
857     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
858 slacapra 1.1 if common.analisys_common_info['sites']:
859     if len(common.analisys_common_info['sites'])>0:
860     req = req + ' && ('
861     for i in range(len(common.analisys_common_info['sites'])):
862     req = req + 'other.GlueCEInfoHostName == "' \
863     + common.analisys_common_info['sites'][i] + '"'
864     if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
865     req = req + ' || '
866     req = req + ')'
867     #print "req = ", req
868     return req
869 gutsche 1.3
870     def configFilename(self):
871     """ return the config filename """
872     return self.name()+'.cfg'
873    
874     ### OLI_DANIELE
875     def wsSetupCMSOSGEnvironment_(self):
876     """
877     Returns part of a job script which is prepares
878     the execution environment and which is common for all CMS jobs.
879     """
880     txt = '\n'
881     txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n'
882     txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
883     txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
884     txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
885     txt += ' elif [ -f $OSG_APP/cmssoft/cmsset_default.sh ] ;then\n'
886     txt += ' # Use $OSG_APP/cmssoft/cmsset_default.sh to setup cms software\n'
887     txt += ' source $OSG_APP/cmssoft/cmsset_default.sh '+self.version+'\n'
888     txt += ' else\n'
889     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
890     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
891     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
892     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
893 gutsche 1.7 txt += ' exit 1\n'
894 gutsche 1.3 txt += '\n'
895     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
896     txt += ' cd $RUNTIME_AREA\n'
897     txt += ' /bin/rm -rf $WORKING_DIR\n'
898     txt += ' if [ -d $WORKING_DIR ] ;then\n'
899 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
900     txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
901     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
902     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
903 gutsche 1.3 txt += ' fi\n'
904     txt += '\n'
905 gutsche 1.7 txt += ' exit 1\n'
906 gutsche 1.3 txt += ' fi\n'
907     txt += '\n'
908     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
909     txt += ' echo " END SETUP CMS OSG ENVIRONMENT "\n'
910    
911     return txt
912    
913     ### OLI_DANIELE
914     def wsSetupCMSLCGEnvironment_(self):
915     """
916     Returns part of a job script which is prepares
917     the execution environment and which is common for all CMS jobs.
918     """
919     txt = ' \n'
920     txt += ' echo " ### SETUP CMS LCG ENVIRONMENT ### "\n'
921     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
922     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
923     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
924     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
925     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
926 gutsche 1.7 txt += ' exit 1\n'
927 gutsche 1.3 txt += ' else\n'
928     txt += ' echo "Sourcing environment... "\n'
929     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
930     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
931     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
932     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
933     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
934 gutsche 1.7 txt += ' exit 1\n'
935 gutsche 1.3 txt += ' fi\n'
936     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
937     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
938     txt += ' result=$?\n'
939     txt += ' if [ $result -ne 0 ]; then\n'
940     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
941     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
942     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
943     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
944 gutsche 1.7 txt += ' exit 1\n'
945 gutsche 1.3 txt += ' fi\n'
946     txt += ' fi\n'
947     txt += ' \n'
948     txt += ' string=`cat /etc/redhat-release`\n'
949     txt += ' echo $string\n'
950     txt += ' if [[ $string = *alhalla* ]]; then\n'
951     txt += ' echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
952     txt += ' elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
953     txt += ' export SCRAM_ARCH=slc3_ia32_gcc323\n'
954     txt += ' echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
955     txt += ' else\n'
956 gutsche 1.7 txt += ' echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
957 gutsche 1.3 txt += ' echo "JOB_EXIT_STATUS = 10033"\n'
958     txt += ' echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
959     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
960 gutsche 1.7 txt += ' exit 1\n'
961 gutsche 1.3 txt += ' fi\n'
962     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
963     txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
964     return txt
965 gutsche 1.5
966     def setParam_(self, param, value):
967     self._params[param] = value
968    
969     def getParams(self):
970     return self._params
971 gutsche 1.8
972     def setTaskid_(self):
973     self._taskId = self.cfg_params['taskId']
974    
975     def getTaskid(self):
976     return self._taskId