ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/submit.py
Revision: 1.17
Committed: Tue Feb 28 11:54:36 2012 UTC (13 years, 2 months ago) by paus
Content type: text/x-python
Branch: MAIN
CVS Tags: Mit_025e, Mit_025d
Changes since 1.16: +104 -64 lines
Log Message:
Last updates.

File Contents

# User Rev Content
1 paus 1.2 #!/usr/bin/env python
2     #---------------------------------------------------------------------------------------------------
3     # Script to test, create and submit one complete production task
4     #
5     # Complete refers here to the proper preparation of the ultimate storage location (a storge element
6     # with a given storage path etc.), the submission of a test job to evaluate timing and data output
7     # size and finally the creation of the job configurations and the submission of the jobs to the grid
8     # sites. The grid is accessed via crab tools.
9     #
10     # While the script is pretty complete it has become a bit long and messy. Some cleanup will
11     # hopefully soon be performed.
12     #
13     # Author: C.Paus (July 1, 2008)
14     #---------------------------------------------------------------------------------------------------
15     import os,sys,getopt,re,string
16 paus 1.3 import task,translator
17 paus 1.2
18     #===================================================================================================
19 paus 1.12 def domain():
20     d = os.uname()[1]
21     f = d.split('.')
22     return '.'.join(f[1:])
23    
24     def storage(seFile):
25     # decide on the forseen default storage place (where are we running?)
26     storageTag = 'T2_US_MIT'
27     dom = domain()
28     if re.search('mit.edu',dom):
29     storageTag = 'T2_US_MIT'
30     elif re.search('cern.ch',dom):
31     storageTag = 'T0_CH_CERN'
32     cmd = 'grep ^' + storageTag + ' ' + seFile
33     for line in os.popen(cmd).readlines(): # run command
34     #print ' LINE: ' + line
35     line = line[:-1] # strip '\n'
36     line = line.replace(' ','')
37     f = line.split(':')
38     storageEle = f[1]
39     storagePath = f[2]
40     userRemoteDir = f[3]
41     return storageEle + ':' + storagePath + ':' + userRemoteDir
42    
43     #===================================================================================================
44     def getFiles(mitCfg,version):
45     # Prepare file based processing input
46     runFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'run.sh'
47     if not os.path.exists(runFile):
48     cmd = "Run file not found: %s" % runFile
49     raise RuntimeError, cmd
50     cmd = 'cp ' + runFile + ' ./'
51     os.system(cmd)
52     writeCfgFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'writeCfg.py'
53     cmd = 'cp ' + writeCfgFile + ' ./'
54     if not os.path.exists(writeCfgFile):
55     cmd = "Write Cfg file not found: %s" % writeCfgFile
56     raise RuntimeError, cmd
57     os.system(cmd)
58 paus 1.15 storageFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' \
59     + 'storageTarget'
60     if not os.path.exists(storageFile):
61     cmd = "Storage file not found: %s" % storageFile
62     raise RuntimeError, cmd
63     cmd = 'cp ' + storageFile + ' ./'
64     os.system(cmd)
65 paus 1.12
66     #===================================================================================================
67     def makeLfnFile(mitCfg,version,mitDataset,dbs,useExistingLfns):
68     lfnFile = mitCfg + '/' + version + '/' + mitDataset + '.lfns'
69     if os.path.exists(lfnFile):
70     print "\n INFO -- Lfn file found: %s. Someone already worked on this dataset.\n" % lfnFile
71     if not useExistingLfns:
72     cmd = 'rm ' + lfnFile
73     os.system(cmd)
74    
75     # recreate if requested or not existing
76     if not useExistingLfns or not os.path.exists(lfnFile):
77 paus 1.17 cmd = 'input.py --dbs=' + dbs + ' --option=lfn --dataset=' + cmsDataset \
78     + ' | sort -u > ' + lfnFile
79 paus 1.12 print ' Input: ' + cmd + '\n'
80     os.system(cmd)
81    
82     return lfnFile
83    
84     #===================================================================================================
85 paus 1.17 def makeSiteFile(mitCfg,version,mitDataset,dbs,useExistingSites):
86     siteFile = mitCfg + '/' + version + '/' + mitDataset + '.sites'
87     if os.path.exists(siteFile):
88     print "\n INFO -- Site file found: %s. Someone already worked on this dataset.\n" % siteFile
89     if not useExistingSites:
90     cmd = 'rm ' + siteFile
91     os.system(cmd)
92    
93     # recreate if requested or not existing
94     if not useExistingSites or not os.path.exists(siteFile):
95     cmd = 'sitesList.py --dbs=' + dbs + ' --dataset=' + cmsDataset + ' > ' + siteFile
96     print ' Sites: ' + cmd + '\n'
97     os.system(cmd)
98    
99     return siteFile
100    
101     #===================================================================================================
102 paus 1.12 def searchReplace(line,mitCfg,version,mitDataset,storage, \
103 paus 1.2 cmsDataset='X',cmsswPy='X',dbs='X',sched='X',blacklist='X',skpEvts='X'):
104 paus 1.12 # decode storage variable
105     st = storage.split(':')
106 paus 1.2 # compile search and replacement sequences
107     pCmsDset = re.compile('XX-CMSDATASET-XX')
108     pMitDset = re.compile('XX-MITDATASET-XX')
109     pCmsswPy = re.compile('XX-CMSSWPY-XX')
110     pMitCfg = re.compile('XX-MITCFG-XX')
111     pMitVers = re.compile('XX-MITVERSION-XX')
112 paus 1.12 pMitSe = re.compile('XX-MITSE-XX')
113     pMitSPath = re.compile('XX-MITSPATH-XX')
114     pMitRDir = re.compile('XX-MITRDIR-XX')
115 paus 1.2 pDbs = re.compile('XX-DBS-XX')
116     pSched = re.compile('XX-SCHED-XX')
117     pBlacklist = re.compile('XX-BLACKLIST-XX')
118     pSkpEvts = re.compile('XX-SKIPEVTS-XX')
119     # perform all search and replaces
120     line = pCmsDset .sub(cmsDataset, line);
121     line = pMitDset .sub(mitDataset, line);
122     line = pCmsswPy .sub(cmsswPy, line);
123     line = pMitCfg .sub(mitCfg, line);
124     line = pMitVers .sub(version, line);
125 paus 1.12 line = pMitSe .sub(st[0], line);
126     line = pMitSPath .sub(st[1], line);
127     line = pMitRDir .sub(st[2], line);
128 paus 1.2 line = pDbs .sub(dbs, line);
129     line = pSched .sub(sched, line);
130     line = pBlacklist.sub(blacklist, line);
131     return line
132    
133     #===================================================================================================
134     def adjustCfg(line,nevents,crabId):
135     # compile search and replacement sequences
136     pNevents = re.compile('XX-NEVENTS-XX')
137     pCrabId = re.compile('XX-CRABID-XX')
138     # perform all search and replaces
139     line = pNevents .sub(str(nevents),line);
140     line = pCrabId .sub(crabId, line);
141     return line
142    
143     #===================================================================================================
144 paus 1.12 def findStoragePath(mitCfg,version,mitDataset,seFile):
145 paus 1.2 # find the forseen storage place
146 paus 1.4 cmd = 'grep ^storage_path ' + os.environ['MIT_PROD_DIR'] \
147     + '/' + mitCfg + '/' + version + '/crab.cfg'
148 paus 1.2 for file in os.popen(cmd).readlines():
149     line = file[:-1]
150 paus 1.12 line = searchReplace(line,mitCfg,version,mitDataset,storage(seFile));
151 paus 1.2 # decode the storage directory name
152     names = line.split("=")
153     names = names[1:]
154     storagePath = "=".join(names)
155     storagePath = re.sub("\s", "",storagePath)
156 paus 1.4 cmd = 'grep ^user_remote_dir ' + os.environ['MIT_PROD_DIR'] \
157     + '/' + mitCfg + '/' + version + '/crab.cfg'
158 paus 1.2 for file in os.popen(cmd).readlines():
159     line = file[:-1]
160 paus 1.12 line = searchReplace(line,mitCfg,version,mitDataset,storage(seFile));
161 paus 1.2 # decode the storage directory name
162     names = line.split(" ")
163     storagePath += names[-1]
164     return storagePath
165    
166     #===================================================================================================
167     def createDirCern(storagePath):
168     # check whether path exists
169     cmd = 'rfdir ' + storagePath + ' >& /dev/null'
170     status = os.system(cmd)
171    
172     # create it if missing
173     if status == 0:
174     print ' Castor directory exists: ' + storagePath + '\n --> Moving on.'
175     else:
176     print ' Castor directory needs to be created.'
177     cmd = 'rfmkdir -p ' + storagePath
178     status = os.system(cmd)
179     if status == 0:
180     print ' --> Created: ' + storagePath
181    
182     # always set the permissions
183     cmd = 'rfchmod 777 ' + storagePath
184     status = os.system(cmd)
185     if status == 0:
186     print ' --> Set permissions: 777\n'
187     else:
188     print ' --> Setting permissions failed. EXIT now.\n'
189     sys.exit(1)
190    
191     #===================================================================================================
192 paus 1.5 def create(path):
193     status = -1
194     if re.search('/pnfs/cmsaf.mit.edu/t2bat',path):
195     f = path.split('=')
196     path = f[-1]
197 paus 1.17 cmd = 'ssh -x paus@cgate mkdir -p ' + path
198 paus 1.5 status = os.system(cmd)
199 paus 1.17 cmd = 'ssh -x paus@cgate chmod 777 ' + path
200 paus 1.5 status = os.system(cmd)
201     return status
202    
203     #===================================================================================================
204 paus 1.17 def createDirGeneral(storageEle,storagePath,fastCreate):
205 paus 1.2 # create all relevant subdirectories
206     f = storagePath.split('/') # splitting every '/'
207     storagePath2 = "/".join(f[:-1])
208     storagePath1 = "/".join(f[:-2])
209     storagePath0 = "/".join(f[:-3])
210    
211 paus 1.5 if create(storagePath) == 0:
212     print ' '
213     print ' Directory was created at MIT.\n'
214     return
215    
216 paus 1.2 # set the storage URL
217     storageUrl = 'srm://' + storageEle + ':8443' + storagePath
218    
219     # check whether path exists
220     cmd = 'srmls ' + storageUrl + ' >& /dev/null'
221     status = os.system(cmd)
222    
223     # create it only if missing
224     if status == 0:
225     print ' '
226     print ' Directory already found.... moving on.'
227    
228     else:
229     # create all relevant directories
230 paus 1.17 if fastCreate == 0:
231     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath0 + ' >& /dev/null'
232     print ' srmmkdir: ' + cmd
233     status = os.system(cmd)
234     print ' srmmkdir: status %d'%(status)
235     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath1 + ' >& /dev/null'
236     print ' srmmkdir: ' + cmd
237     status = os.system(cmd)
238     print ' srmmkdir: status %d'%(status)
239     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath2 + ' >& /dev/null'
240     print ' srmmkdir: ' + cmd
241     status = os.system(cmd)
242     print ' srmmkdir: status %d'%(status)
243     else:
244     print ' Fast create activated. '
245 paus 1.2
246     # create the main storage directory
247     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath + ' >& /dev/null'
248     print ' srmmkdir: ' + cmd
249     status = os.system(cmd)
250     print ' srmmkdir: status %d'%(status)
251     if status == 0:
252     print ' '
253     print ' Directory was created.'
254     else:
255     print ' '
256     print ' '
257     print ' '
258     print ' ERROR - failed to create the Storage Area.'
259     print ' '
260     print ' '
261    
262     print ' Check permissions with: srmls -l -count=1 ' + storageUrl + '\n'
263 paus 1.14 cmd = 'srmls -l -count=1 ' + storageUrl + ' | grep UserPermission | grep RWX'
264 paus 1.2 status = -1
265     for line in os.popen(cmd).readlines(): # run command
266     line = line[:-1]
267     print " Permissions? " + line
268     status = 0
269     #status = os.system(cmd)
270     print ' '
271    
272     if status != 0:
273     print ' --> Directory creation or permissions failed. EXIT now.\n'
274     sys.exit(1)
275    
276     #===================================================================================================
277     # Main starts here
278     #===================================================================================================
279     # Define string to explain usage of the script
280     usage = "Usage: submit.py --cmsDataset=<name> | --mitDataset=<name>\n"
281     usage += " --cmssw=<name>\n"
282     usage += " --mitCfg=<name>\n"
283     usage += " --version=<version>\n"
284     usage += " --dbs=<name>\n"
285     usage += " --sched=<name>\n"
286     usage += " --blacklist=<name>\n"
287     usage += " --nSubmit=<submittedJobs>\n"
288     usage += " --skipEvents=<'nRunX:nEventY','nRunXX:nEventYY',...>\n"
289 paus 1.12 usage += " --fixSites=<siteList>\n"
290 paus 1.2 usage += " --complete\n"
291     usage += " --testJob\n"
292     usage += " --noTestJob\n"
293     usage += " --help\n"
294    
295     # Define the valid options which can be specified and check out the command line
296     valid = ['cmsDataset=','mitDataset=','cmssw=','mitCfg=','version=','dbs=','sched=','blacklist=',
297 paus 1.17 'nSubmit=','skipEvents=','fixSites=','complete','useExistingLfns','useExistingSites',
298     'testJob','noTestJob','test','help']
299 paus 1.2 try:
300     opts, args = getopt.getopt(sys.argv[1:], "", valid)
301     except getopt.GetoptError, ex:
302     print usage
303     print str(ex)
304     sys.exit(1)
305    
306     # --------------------------------------------------------------------------------------------------
307     # Get all parameters for the production
308     # --------------------------------------------------------------------------------------------------
309 paus 1.8 # crab id
310 paus 1.2 cmd = "date +crab_0_%y%m%d_%H%M%S"
311     for line in os.popen(cmd).readlines(): # run command
312     line = line[:-1]
313     crabId = line
314     print "\n This job will be CrabId: " + crabId + "\n"
315 paus 1.17 # other non command line parameters
316     fastCreate = 0
317     # Set defaults for each command line parameter/option
318     cmsDataset = None
319     mitDataset = None
320     cmssw = "cmssw"
321     mitCfg = "filefi"
322     version = os.environ['MIT_VERS']
323     #dbs = "https://cmsdbsprod.cern.ch:8443/cms_dbs_prod_global/servlet/DBSServlet"
324     dbs = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
325     sched = "glite"
326     blacklist = ""
327     nSubmit = -1
328     skpEvts = ''
329     fixSites = ''
330     complete = 0
331     useExistingLfns = False
332     useExistingSites = False
333     noTestJob = 0
334     testJob = 0
335     test = 0
336 paus 1.2
337     # Read new values from the command line
338     for opt, arg in opts:
339     if opt == "--help":
340     print usage
341     sys.exit(0)
342     if opt == "--cmsDataset":
343 paus 1.17 cmsDataset = arg
344 paus 1.2 if opt == "--mitDataset":
345 paus 1.17 mitDataset = arg
346 paus 1.2 if opt == "--cmssw":
347 paus 1.17 cmssw = arg
348 paus 1.2 if opt == "--mitCfg":
349 paus 1.17 mitCfg = arg
350 paus 1.2 if opt == "--version":
351 paus 1.17 version = arg
352 paus 1.2 if opt == "--dbs":
353 paus 1.17 dbs = arg
354 paus 1.2 if opt == "--sched":
355 paus 1.17 sched = arg
356 paus 1.2 if opt == "--blacklist":
357 paus 1.17 blacklist = arg
358 paus 1.2 if opt == "--nSubmit":
359 paus 1.17 nSubmit = arg
360 paus 1.2 if opt == "--skipEvents":
361 paus 1.17 skpEvts = arg
362 paus 1.12 if opt == "--fixSites":
363 paus 1.17 fixSites = arg
364 paus 1.2 if opt == "--complete":
365 paus 1.17 complete = 1
366 paus 1.8 if opt == "--useExistingLfns":
367 paus 1.17 useExistingLfns = True
368     if opt == "--useExistingSites":
369     useExistingSites = True
370 paus 1.2 if opt == "--noTestJob":
371 paus 1.17 noTestJob = 1
372     testJob = 0
373 paus 1.2 if opt == "--testJob":
374 paus 1.17 testJob = 1
375     noTestJob = 0
376 paus 1.2 if opt == "--test":
377 paus 1.17 test = 1
378 paus 1.2
379     # Deal with obvious problems
380     if cmsDataset == None and mitDataset == None:
381     cmd = "--cmsDataset | --mitDataset options not provided. One of them is required."
382     raise RuntimeError, cmd
383 paus 1.12 seFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/'+ version + '/seTable'
384     if not os.path.exists(seFile):
385     cmd = "Storage element table not found: %s" % seFile
386     raise RuntimeError, cmd
387 paus 1.3 crabFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'crab.cfg'
388 paus 1.2 if not os.path.exists(crabFile):
389     cmd = "Crab file not found: %s" % crabFile
390     raise RuntimeError, cmd
391 paus 1.3 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmssw + '.py'
392 paus 1.2 if not os.path.exists(cmsswFile):
393     cmd = "Cmssw file not found: %s" % cmsswFile
394     cmd = " XXXX ERROR no valid configuration found XXXX"
395     raise RuntimeError, cmd
396 paus 1.13 cmsswPy = cmssw + '_' + crabId + '.py'
397 paus 1.2
398 paus 1.3 # Prepare the ce/se translator
399 paus 1.12 trans = translator.Translator(os.environ['MIT_PROD_DIR']+'/'+mitCfg+'/'+version+'/ceTable',
400     os.environ['MIT_PROD_DIR']+'/'+mitCfg+'/'+version+'/seTable',
401     os.environ['MIT_PROD_DIR']+'/'+mitCfg+'/'+version+'/preferredSites')
402    
403     # Create the corresponding crab task
404     crabTask = task.Task(crabId,cmsDataset,mitDataset,mitCfg,version,cmssw)
405     if crabTask.mitDataset == 'undefined' or crabTask.cmsDataset == 'undefined':
406 paus 1.2 print "ERROR - dataset not defined."
407     sys.exit(1)
408 paus 1.12 else:
409     mitDataset = crabTask.mitDataset
410     cmsDataset = crabTask.cmsDataset
411     dbs = crabTask.dbs
412 paus 1.2
413 paus 1.15 # Deal with obvious problems
414     if cmsDataset == None or mitDataset == None:
415     cmd = "--cmsDataset & --mitDataset " + \
416     "Have to be defined now (do you have the right database (Productions.<cmssw>)?"
417     raise RuntimeError, cmd
418    
419 paus 1.12 getFiles(mitCfg,version)
420 paus 1.17 lfnFile = makeLfnFile (mitCfg,version,mitDataset,dbs,useExistingLfns)
421 paus 1.8
422 paus 1.12 crabTask.storagePath = findStoragePath(mitCfg,version,mitDataset,seFile)
423 paus 1.2 crabTask.loadAllLfns(lfnFile)
424     crabTask.loadCompletedLfns()
425     crabTask.createMissingLfns(lfnFile,lfnFile + '_' + crabTask.tag)
426 paus 1.17 # decide whether there are more jobs (lfns=files) to be submitted
427 paus 1.4 if crabTask.nLfnMissing == 0:
428     print ' All requested LFNs are available. EXIT now.'
429     sys.exit()
430 paus 1.17 # decide whether we always need to create the full directory tree
431     if crabTask.nLfnDone != 0:
432     fastCreate = 1
433    
434     # ok, looks like we will be submitting
435     siteFile = makeSiteFile(mitCfg,version,mitDataset,dbs,useExistingSites)
436 paus 1.4
437 paus 1.2 crabTask.createSubTasks(lfnFile + '_' + crabTask.tag)
438     cmd = 'cp ' + lfnFile + '_' + crabTask.tag + '_*' + ' ./'
439     os.system(cmd)
440    
441     nevents = len(crabTask.lfns)
442    
443     # Say what we do now
444     print ' Preparing dataset: ' + cmsDataset + ' [MIT: ' + mitDataset + ' with ' + str(nevents) + \
445     ' total jobs(=lfns), nEvtsTotal: %d]' % crabTask.nTotalEvts
446    
447     # --------------------------------------------------------------------------------------------------
448     # Prepare the config files
449     # --------------------------------------------------------------------------------------------------
450     # Cleaning up
451 paus 1.3 cmd = "rm -f crab_" + crabTask.tag + ".cfg crab_" + crabTask.tag + ".cfg-Template " + cmsswPy
452 paus 1.2 os.system(cmd)
453    
454 paus 1.12 # Storage
455    
456 paus 1.2 # Parse template input and write the crab configuration file
457     fileInput = open(crabFile,'r')
458 paus 1.3 fileOutput = open("crab_" + crabTask.tag + ".cfg-Template",'w')
459 paus 1.2 line = fileInput.readline()
460     while (line != ''):
461     if line[0] != '#':
462 paus 1.12 line = searchReplace(line,mitCfg,version,mitDataset,storage(seFile), \
463 paus 1.2 cmsDataset,cmsswPy,dbs,sched,blacklist,skpEvts)
464     fileOutput.write(line)
465     line = fileInput.readline()
466     fileInput .close()
467     fileOutput.close()
468    
469     # Parse template input and write the crab configuration file
470     fileInput = open(cmsswFile,'r')
471     fileOutput = open(cmsswPy,'w')
472     line = fileInput.readline()
473     while (line != ''):
474     if line[0] != '#':
475 paus 1.12 line = searchReplace(line,mitCfg,version,mitDataset,storage(seFile), \
476 paus 1.2 cmsDataset,cmsswPy,dbs,sched,blacklist,skpEvts)
477     fileOutput.write(line)
478     line = fileInput.readline()
479     fileInput .close()
480     fileOutput.close()
481    
482     # --------------------------------------------------------------------------------------------------
483     # Job creation and submission
484     # --------------------------------------------------------------------------------------------------
485     print '\n Using CMSSW version: ' + os.environ['CMSSW_VERSION']
486     print ' Using CRAB version: ' + os.environ['CRAB_VERS'] + '\n'
487    
488     pattern1 = 'working directory'
489 paus 1.5 pattern2 = 'Total of '
490 paus 1.2
491     # Go through the crabTask and submit each subtask to the grid
492     for subTask in crabTask.subTasks:
493    
494     tag = crabTask.tag + '_' + subTask.tag()
495     print ' Working on subtask: ' + tag
496    
497     nJobsTotal = 0
498     crabIdCheck = ''
499    
500     # adjust crab config
501 paus 1.3 fileInput = open("crab_" + crabTask.tag + ".cfg-Template",'r')
502     fileOutput = open("crab_" + crabTask.tag + ".cfg",'w')
503 paus 1.2 line = fileInput.readline()
504     while (line != ''):
505     if line[0] != '#':
506     line = adjustCfg(line,subTask.nSubTaskLfn,tag)
507     fileOutput.write(line)
508     line = fileInput.readline()
509     fileInput .close()
510     fileOutput.close()
511    
512     # ----------------------------------------------------------------------------------------------
513     # Deal with storage element area
514     # ----------------------------------------------------------------------------------------------
515     # find the forseen storage place
516 paus 1.3 cmd = 'grep ^storage_element crab_' + crabTask.tag + '.cfg-Template'
517 paus 1.2 for file in os.popen(cmd).readlines(): # run command
518     line = file[:-1] # strip '\n'
519     # decode the storage element name
520     names = line.split("=") # splitting every '='
521     storageEle = names.pop()
522     storageEle = re.sub("\s", "",storageEle)
523 paus 1.3 cmd = 'grep ^storage_path crab_' + crabTask.tag + '.cfg-Template'
524 paus 1.2 for file in os.popen(cmd).readlines(): # run command
525     line = file[:-1] # strip '\n'
526     # decode the storage directory name
527     names = line.split("=") # splitting every '='
528     names = names[1:]
529     storagePath = "=".join(names)
530     storagePath = re.sub("\s", "",storagePath)
531 paus 1.3 cmd = 'grep ^user_remote_dir crab_' + crabTask.tag + '.cfg-Template'
532 paus 1.2 for file in os.popen(cmd).readlines(): # run command
533     line = file[:-1] # strip '\n'
534     # decode the storage directory name
535     names = line.split(" ") # splitting every '='
536     storagePath += names[-1]
537     storageUrl = 'srm://' + storageEle + ':8443' + storagePath
538    
539     storagePath = adjustCfg(storagePath,subTask.nSubTaskLfn,tag)
540     storageUrl = adjustCfg(storageUrl, subTask.nSubTaskLfn,tag)
541    
542     print ' StorageUrl: ' + storageUrl
543    
544     # Create storage area
545     if test == 0: # for testing we do not create the directories
546     if (storageEle == 'srm.cern.ch'):
547     createDirCern(storagePath)
548     else:
549 paus 1.17 createDirGeneral(storageEle,storagePath,fastCreate)
550 paus 1.2
551     # cmd = "crab -create -debug 3 -USER.ui_working_dir=" + tag + " | tee forDaniele "
552 paus 1.3 cmd = "crab -create -cfg crab_" + crabTask.tag + ".cfg -USER.ui_working_dir=" + tag
553 paus 1.2 print ' -> ' + cmd
554     if test != 0:
555     cmd = 'echo ' + cmd
556     print ' ' + cmd
557     for line in os.popen(cmd).readlines(): # run command
558     line = line[:-1] # strip '\n'
559     print ' CRAB: ' + line
560     if re.search(pattern1,line):
561     f = line.split("/")
562     crabIdCheck = f[-2]
563     if re.search(pattern2,line):
564     f = line.split(" ")
565 paus 1.5 nJobsTotal = f[4]
566 paus 1.2 if nJobsTotal == '':
567     nJobsTotal = f[2]
568 paus 1.3 # report
569 paus 1.2 print ' --> %s jobs created (%s).\n'%(nJobsTotal,tag)
570 paus 1.3 # and cleanup the temporary file for the subtask
571     cmd = "rm -f " + mitDataset + ".lfns_" + tag
572     os.system(cmd)
573 paus 1.15
574     # test something useful is going to happen
575     if int(nJobsTotal) > 0:
576 paus 1.17 print ' More than zero (%d) jobs got created, go ahead and submit (%s).\n'%(int(nJobsTotal),crabId)
577 paus 1.15 else:
578     print ' Zero or less jobs got created: cleanup and EXIT.'
579     cmd = 'rm -rf `echo *' + crabId + '*`'
580     print ' Cleanup: ' + cmd + '\n'
581     #os.system(cmd)
582     sys.exit(0)
583    
584 paus 1.3
585 paus 1.2 # adjust arguments
586     cmd = 'input.py --db=' + lfnFile + '_' + tag + ' --option=xml --dataset=' + cmsDataset + \
587     ' > ' + tag + '/share/arguments.xml'
588     print ' update arguments: ' + cmd
589     if test == 0:
590     os.system(cmd)
591    
592     # loop through the file and determine the submission parameters
593     block = ''
594     blocks = []
595     idx = 0
596     minIdxs = []
597     maxIdxs = []
598    
599     fileInput = open(lfnFile + '_' + tag,'r')
600     line = fileInput.readline()
601     while (line != ''):
602     line = line[:-1]
603     if line[0] != '#':
604     idx += 1
605     f = line.split()
606     block = f[0]
607     lfn = f[1]
608     nEvents = f[2]
609     if len(blocks) == 0 or block != blocks[-1]:
610     # new block found
611     blocks .append(block)
612     minIdxs.append(idx)
613     maxIdxs.append(idx)
614     else:
615     maxIdxs[-1] = idx
616     # keep the reading going
617     line = fileInput.readline()
618     fileInput .close()
619    
620     # merge blocks together if they use the same sites
621     lastSites = ''
622     idx = 0
623     mergedIdx = 0
624     mergedBlocks = []
625     mergedSites = []
626     mergedMinIdxs = []
627     mergedMaxIdxs = []
628     print '\n Show the unmerged blocks:'
629     for block in blocks:
630 paus 1.5
631 paus 1.12
632     if fixSites != '':
633     sites = fixSites
634     else:
635 paus 1.17 if not os.path.exists(siteFile):
636     cmd = "sites.py --block=" + block + " --dbs=" + dbs
637     else:
638     cmd = "cat " + siteFile + "| grep " + block + " | cut -d ':' -f2 | tr -d ' '"
639     print 'CMD: ' + cmd
640 paus 1.12 for line in os.popen(cmd).readlines(): # run command
641     line = line[:-1]
642     sites = line
643    
644     sites = trans.translateSes(sites)
645     sites = trans.selectPreferred()
646 paus 1.5
647 paus 1.17 if sites == '':
648     print ' ERROR - no sites for this data block, do not submit.'
649     else:
650     print ' Block ' + block + ' process: %d to %d'%(minIdxs[idx],maxIdxs[idx]) + \
651     ' at\n > ' + sites
652 paus 1.2 # block with different sites found
653     if sites != lastSites:
654     mergedSites .append(sites)
655 paus 1.6 mergedBlocks .append(blocks [idx]) # only the first block with these sites is stored
656 paus 1.2 mergedMinIdxs.append(minIdxs[idx])
657     mergedMaxIdxs.append(maxIdxs[idx])
658 paus 1.6 lastSites = sites
659     mergedIdx += 1
660 paus 1.2 else:
661     mergedMaxIdxs[mergedIdx-1] = maxIdxs[idx]
662 paus 1.6
663 paus 1.2 # last action in the loop: increment the unmerged blocks
664     idx += 1
665    
666     # Show already what we will do
667     idx = 0
668     print '\n Show the merged blocks:'
669     for block in mergedBlocks:
670     print ' Merged Block ' + block + ' process: %d to %d'\
671     %(mergedMinIdxs[idx],mergedMaxIdxs[idx]) + \
672     ' at\n > ' + mergedSites[idx]
673     # last action in the loop: increment the merged blocks
674     idx += 1
675    
676     # perfrom the submission block by block (using the merged blocks of course)
677     nSubmission = len(mergedBlocks)
678     idx = 0
679     print '\n Submit the merged blocks:'
680     for block in mergedBlocks:
681     print ' Merged Block ' + block + ' process: %d to %d'\
682     %(mergedMinIdxs[idx],mergedMaxIdxs[idx]) + \
683     ' at\n > ' + mergedSites[idx]
684    
685     nSubmit = '%d-%d'%(mergedMinIdxs[idx],mergedMaxIdxs[idx])
686     if mergedMinIdxs[idx] == mergedMaxIdxs[idx]:
687     nSubmit = '%d,%d'%(mergedMinIdxs[idx],100000000)
688 paus 1.5 cmd = 'crab -submit %s -continue %s -GRID.ce_white_list=%s'%(nSubmit,tag,mergedSites[idx])
689     print ' ' + cmd + '\n'
690 paus 1.12 status = os.system(cmd)
691 paus 1.17 retry = False
692     while status != 0 and not retry:
693     retry = True
694     print ' Submission failed (%s) --> retry once!'%(cmd)
695 paus 1.3 status = os.system(cmd)
696    
697 paus 1.2 # last action in the loop: increment the merged blocks
698     idx += 1
699    
700     print ' Number of blocks submitted: %d' % nSubmission
701    
702 paus 1.3 # and cleanup the temporary file for the task
703     cmd = "rm -f crab_" + crabTask.tag + ".cfg crab_" + crabTask.tag + ".cfg-Template " \
704 paus 1.11 + cmsswPy + ' ' + cmsswPy + 'c'
705 paus 1.3 os.system(cmd)
706    
707 paus 1.2 print ' Done... keep watching it...'
708     sys.exit(0)
709    
710     # --------------------------------------------------------------------------------------------------
711     # Run a test job to test the configuration and measure the expected output size
712     # --------------------------------------------------------------------------------------------------
713     if noTestJob == 0:
714     #-----------------------------------------------------------------------------------------------
715     # use a specific file for test
716     #-----------------------------------------------------------------------------------------------
717     # first check whether we already have a local file
718     print '\n Try to find an existing local file using "find"'
719     f = cmsDataset.split("/")
720     name = f[1]
721     vers = f[2]
722     tier = f[3]
723     file = ""
724     lfn = ""
725     cmd = 'find ./store/ -name ' + name
726     print 'Searching: ' + cmd
727     for line in os.popen(cmd).readlines(): # run command
728     file = line[:-1] # strip '\n'
729    
730     # looks like there could be a file we found a directory, confirm
731     if file != "":
732     print '\n We have a directory ' + file + ' .. confirming'
733     file = file + '/' + tier
734     cmd = 'find ' + file + ' -name \*.root'
735     for line in os.popen(cmd).readlines(): # run command
736     if line.find(vers):
737     file = line[:-1] # strip '\n'
738     lfn = file[1:]
739    
740     if os.path.exists(file):
741     print ' moving on with locally found file: \n' + ' ' + file
742     else:
743     print ' no local file found'
744     file = ""
745     lfn = ""
746    
747     # now try to see whether we can find a file to download
748     if file == "":
749     cmd = './bin/findLfn.py --input=' + cmsDataset + ' | grep /store/'
750     print '\n Find an LFN to download: ' + cmd
751     for line in os.popen(cmd).readlines(): # run command
752     if line.find("/store") != -1:
753     lfn = line[:-1] # strip '\n'
754     break
755     if lfn == "":
756     print "\n WARNING: No file found, continue assuming it is a simulation job.\n\n"
757     else:
758     print ' --> LFN: ' + lfn
759     file = '.' + lfn
760    
761     if os.path.exists(file):
762     print ' --> File already exists: ' + file
763     else:
764     cmd = './bin/downloadLfn.py ' + lfn
765     print ' --> downloading: ' + cmd
766     status = os.system(cmd)
767     if status != 0:
768     print ' ERROR - failed to copy LFN. EXIT now!'
769     sys.exit(1)
770    
771     # Parse template input and adjust the input file to the newly copied lfn
772     fileInput = open(cmsswPy,'r')
773     fileOutput = open("test-"+cmsswPy,'w')
774     line = fileInput.readline()
775     while (line != ''):
776     if line.find("file:") != -1:
777     line = '"file:' + lfn[1:] + '"\n'
778     fileOutput.write(line)
779     line = fileInput.readline()
780     fileInput .close()
781     fileOutput.close()
782    
783     # Setting the number of events (hard coded in the file so far)
784     nTryEvts = 1000.
785    
786     print '\n --> Please wait, running a test job now! Should be short (trying %.0f'%nTryEvts + \
787     ' evts). Check log: cmssw.log'
788     cmd = 'rm -f cmssw.log; /usr/bin/time --format "%e %U %S" cmsRun test-' + cmsswPy + \
789     ' >& cmssw.log'
790     print ' CMD: ' + cmd
791     status = os.system(cmd)
792    
793     cmd = 'tail -1 cmssw.log'
794     for file in os.popen(cmd).readlines(): # run command
795     line = file[:-1] # strip '\n'
796     f = line.split() # splitting every blank
797     rtime = float(f[0]) # wall clock time
798     utime = float(f[1]) # user time
799     stime = float(f[2]) # system time
800    
801     nEvtsTest = 1000
802     cmd = 'grep \'Begin processing\' cmssw.log | tail -1'
803     for file in os.popen(cmd).readlines(): # run command
804     line = file[:-1] # strip '\n'
805     # test whether there is a directory
806     f = line.split() # splitting every blank
807     nEvtsTest = f[3] # this is the number of records processed
808     nEvtsTest = int(nEvtsTest[:-2]) # strip 'th'
809     cmd = 'ls -s ' + mitDataset + '*.root'
810     size = 0
811     for file in os.popen(cmd).readlines(): # run command
812     line = file[:-1] # strip '\n'
813     f = line.split() # splitting every blank
814     size += int(f[0])/1000. # size in MB
815    
816     cmd = 'tail -1 cmssw.log'
817     for file in os.popen(cmd).readlines(): # run command
818     line = file[:-1] # strip '\n'
819     # get total, user and system times
820     names = line.split() # splitting every blank
821    
822     if nEvtsTest != nTryEvts:
823     print ' WARNING - Instead of %f did %d'%(nTryEvts,nEvtsTest)
824    
825     print ' '
826     print ' Number of test events produced: %d'%nEvtsTest
827     print ' File size for all events: %.2f MB'%size
828     print ' Processing time for all events: %.2f secs (u: %.2f s: %.2f)'%(rtime,utime,stime)
829     print ' '
830     print ' --> 1 event == %.2f secs'%(rtime/nEvtsTest)
831     print ' --> 1.00 GB == %d events'%(nEvtsTest/size*1024.)
832     print ' --> %.2f GB == %d events'%(nevents/(nEvtsTest/size*1024.),nevents)
833     print ' '
834    
835     if testJob == 1:
836     print '\n Test job finished, stopping now.\n'
837     sys.exit(0)