ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/submit.py
Revision: 1.6
Committed: Tue Sep 21 20:09:09 2010 UTC (14 years, 7 months ago) by paus
Content type: text/x-python
Branch: MAIN
CVS Tags: Mit_017pre3, Mit_017pre2, Mit_017pre1, Mit_016, Mit_015b, Mit_015a, Mit_015, Mit_014e, Mit_014d, Mit_014c
Changes since 1.5: +4 -3 lines
Log Message:
Update before going to 014e tag and version 3_8_4.

File Contents

# User Rev Content
1 paus 1.2 #!/usr/bin/env python
2     #---------------------------------------------------------------------------------------------------
3     # Script to test, create and submit one complete production task
4     #
5     # Complete refers here to the proper preparation of the ultimate storage location (a storge element
6     # with a given storage path etc.), the submission of a test job to evaluate timing and data output
7     # size and finally the creation of the job configurations and the submission of the jobs to the grid
8     # sites. The grid is accessed via crab tools.
9     #
10     # While the script is pretty complete it has become a bit long and messy. Some cleanup will
11     # hopefully soon be performed.
12     #
13     # Author: C.Paus (July 1, 2008)
14     #---------------------------------------------------------------------------------------------------
15     import os,sys,getopt,re,string
16 paus 1.3 import task,translator
17 paus 1.2
18     #===================================================================================================
19     def searchReplace(line,mitCfg,version,mitDataset, \
20     cmsDataset='X',cmsswPy='X',dbs='X',sched='X',blacklist='X',skpEvts='X'):
21     # compile search and replacement sequences
22     pCmsDset = re.compile('XX-CMSDATASET-XX')
23     pMitDset = re.compile('XX-MITDATASET-XX')
24     pCmsswPy = re.compile('XX-CMSSWPY-XX')
25     pMitCfg = re.compile('XX-MITCFG-XX')
26     pMitVers = re.compile('XX-MITVERSION-XX')
27     pDbs = re.compile('XX-DBS-XX')
28     pSched = re.compile('XX-SCHED-XX')
29     pBlacklist = re.compile('XX-BLACKLIST-XX')
30     pSkpEvts = re.compile('XX-SKIPEVTS-XX')
31     # perform all search and replaces
32     line = pCmsDset .sub(cmsDataset, line);
33     line = pMitDset .sub(mitDataset, line);
34     line = pCmsswPy .sub(cmsswPy, line);
35     line = pMitCfg .sub(mitCfg, line);
36     line = pMitVers .sub(version, line);
37     line = pDbs .sub(dbs, line);
38     line = pSched .sub(sched, line);
39     line = pBlacklist.sub(blacklist, line);
40     return line
41    
42     #===================================================================================================
43     def adjustCfg(line,nevents,crabId):
44     # compile search and replacement sequences
45     pNevents = re.compile('XX-NEVENTS-XX')
46     pCrabId = re.compile('XX-CRABID-XX')
47     # perform all search and replaces
48     line = pNevents .sub(str(nevents),line);
49     line = pCrabId .sub(crabId, line);
50     return line
51    
52     #===================================================================================================
53     def findStoragePath(mitCfg,version,mitDataset):
54     # find the forseen storage place
55 paus 1.4 cmd = 'grep ^storage_path ' + os.environ['MIT_PROD_DIR'] \
56     + '/' + mitCfg + '/' + version + '/crab.cfg'
57 paus 1.2 for file in os.popen(cmd).readlines():
58     line = file[:-1]
59     line = searchReplace(line,mitCfg,version,mitDataset);
60     # decode the storage directory name
61     names = line.split("=")
62     names = names[1:]
63     storagePath = "=".join(names)
64     storagePath = re.sub("\s", "",storagePath)
65 paus 1.4 cmd = 'grep ^user_remote_dir ' + os.environ['MIT_PROD_DIR'] \
66     + '/' + mitCfg + '/' + version + '/crab.cfg'
67 paus 1.2 for file in os.popen(cmd).readlines():
68     line = file[:-1]
69     line = searchReplace(line,mitCfg,version,mitDataset);
70     # decode the storage directory name
71     names = line.split(" ")
72     storagePath += names[-1]
73     return storagePath
74    
75     #===================================================================================================
76     def createDirCern(storagePath):
77     # check whether path exists
78     cmd = 'rfdir ' + storagePath + ' >& /dev/null'
79     status = os.system(cmd)
80    
81     # create it if missing
82     if status == 0:
83     print ' Castor directory exists: ' + storagePath + '\n --> Moving on.'
84     else:
85     print ' Castor directory needs to be created.'
86     cmd = 'rfmkdir -p ' + storagePath
87     status = os.system(cmd)
88     if status == 0:
89     print ' --> Created: ' + storagePath
90    
91     # always set the permissions
92     cmd = 'rfchmod 777 ' + storagePath
93     status = os.system(cmd)
94     if status == 0:
95     print ' --> Set permissions: 777\n'
96     else:
97     print ' --> Setting permissions failed. EXIT now.\n'
98     sys.exit(1)
99    
100     #===================================================================================================
101 paus 1.5 def create(path):
102     status = -1
103     if re.search('/pnfs/cmsaf.mit.edu/t2bat',path):
104     f = path.split('=')
105     path = f[-1]
106     #f = path.split('/')
107     #path1 = "/".join(f[:-1])
108     #cmd = 'ssh paus@cgate mkdir -p ' + path1
109     cmd = 'ssh paus@cgate mkdir -p ' + path
110     status = os.system(cmd)
111     cmd = 'ssh paus@cgate chmod 777 ' + path
112     status = os.system(cmd)
113     return status
114    
115     #===================================================================================================
116 paus 1.2 def createDirGeneral(storageEle,storagePath):
117     # create all relevant subdirectories
118     f = storagePath.split('/') # splitting every '/'
119     storagePath2 = "/".join(f[:-1])
120     storagePath1 = "/".join(f[:-2])
121     storagePath0 = "/".join(f[:-3])
122    
123 paus 1.5 if create(storagePath) == 0:
124     print ' '
125     print ' Directory was created at MIT.\n'
126     return
127    
128 paus 1.2 # set the storage URL
129     storageUrl = 'srm://' + storageEle + ':8443' + storagePath
130    
131     # check whether path exists
132     cmd = 'srmls ' + storageUrl + ' >& /dev/null'
133     status = os.system(cmd)
134    
135     # create it only if missing
136     if status == 0:
137     print ' '
138     print ' Directory already found.... moving on.'
139    
140     else:
141     # create all relevant directories
142     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath0 + ' >& /dev/null'
143     print ' srmmkdir: ' + cmd
144     status = os.system(cmd)
145     print ' srmmkdir: status %d'%(status)
146     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath1 + ' >& /dev/null'
147     print ' srmmkdir: ' + cmd
148     status = os.system(cmd)
149     print ' srmmkdir: status %d'%(status)
150     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath2 + ' >& /dev/null'
151     print ' srmmkdir: ' + cmd
152     status = os.system(cmd)
153     print ' srmmkdir: status %d'%(status)
154    
155     # create the main storage directory
156     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath + ' >& /dev/null'
157     print ' srmmkdir: ' + cmd
158     status = os.system(cmd)
159     print ' srmmkdir: status %d'%(status)
160     if status == 0:
161     print ' '
162     print ' Directory was created.'
163     else:
164     print ' '
165     print ' '
166     print ' '
167     print ' ERROR - failed to create the Storage Area.'
168     print ' '
169     print ' '
170    
171     print ' Check permissions with: srmls -l -count=1 ' + storageUrl + '\n'
172     cmd = 'srmls -l -count=1 ' + storageUrl + ' | grep GroupPermission | grep RWX'
173     status = -1
174     for line in os.popen(cmd).readlines(): # run command
175     line = line[:-1]
176     print " Permissions? " + line
177     status = 0
178     #status = os.system(cmd)
179     print ' '
180    
181     if status != 0:
182     print ' --> Directory creation or permissions failed. EXIT now.\n'
183     sys.exit(1)
184    
185     #===================================================================================================
186     # Main starts here
187     #===================================================================================================
188     # Define string to explain usage of the script
189     usage = "Usage: submit.py --cmsDataset=<name> | --mitDataset=<name>\n"
190     usage += " --cmssw=<name>\n"
191     usage += " --mitCfg=<name>\n"
192     usage += " --version=<version>\n"
193     usage += " --dbs=<name>\n"
194     usage += " --sched=<name>\n"
195     usage += " --blacklist=<name>\n"
196     usage += " --nSubmit=<submittedJobs>\n"
197     usage += " --skipEvents=<'nRunX:nEventY','nRunXX:nEventYY',...>\n"
198     usage += " --complete\n"
199     usage += " --testJob\n"
200     usage += " --noTestJob\n"
201     usage += " --help\n"
202    
203     # Define the valid options which can be specified and check out the command line
204     valid = ['cmsDataset=','mitDataset=','cmssw=','mitCfg=','version=','dbs=','sched=','blacklist=',
205     'nSubmit=','skipEvents=','complete','testJob','noTestJob','test','help']
206     try:
207     opts, args = getopt.getopt(sys.argv[1:], "", valid)
208     except getopt.GetoptError, ex:
209     print usage
210     print str(ex)
211     sys.exit(1)
212    
213     # --------------------------------------------------------------------------------------------------
214     # Get all parameters for the production
215     # --------------------------------------------------------------------------------------------------
216     # Set defaults for each option
217     cmsDataset = None
218     mitDataset = None
219     cmd = "date +crab_0_%y%m%d_%H%M%S"
220     for line in os.popen(cmd).readlines(): # run command
221     line = line[:-1]
222     crabId = line
223     print "\n This job will be CrabId: " + crabId + "\n"
224    
225     cmssw = "cmssw"
226 paus 1.3 mitCfg = "filefi"
227     version = "014"
228 paus 1.2 #dbs = "https://cmsdbsprod.cern.ch:8443/cms_dbs_prod_global/servlet/DBSServlet"
229     dbs = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
230     sched = "glite"
231     blacklist = ""
232     nSubmit = -1
233     skpEvts = ''
234     complete = 0
235     noTestJob = 0
236     testJob = 0
237     test = 0
238    
239     # Read new values from the command line
240     for opt, arg in opts:
241     if opt == "--help":
242     print usage
243     sys.exit(0)
244     if opt == "--cmsDataset":
245     cmsDataset = arg
246     if opt == "--mitDataset":
247     mitDataset = arg
248     if opt == "--cmssw":
249     cmssw = arg
250     if opt == "--mitCfg":
251     mitCfg = arg
252     if opt == "--version":
253     version = arg
254     if opt == "--dbs":
255     dbs = arg
256     if opt == "--sched":
257     sched = arg
258     if opt == "--blacklist":
259     blacklist = arg
260     if opt == "--nSubmit":
261     nSubmit = arg
262     if opt == "--skipEvents":
263     skpEvts = arg
264     if opt == "--complete":
265     complete = 1
266     if opt == "--noTestJob":
267     noTestJob = 1
268     testJob = 0
269     if opt == "--testJob":
270     testJob = 1
271     noTestJob = 0
272     if opt == "--test":
273     test = 1
274    
275     # Make sure we have the right 'database' and the right config file
276     database = 'Productions'
277 paus 1.3 cmsswPy = cmssw + '_' + crabId + '.py'
278 paus 1.2 if cmssw != 'cmssw':
279     database += '.' + cmssw
280    
281     # Deal with obvious problems
282     if cmsDataset == None and mitDataset == None:
283     cmd = "--cmsDataset | --mitDataset options not provided. One of them is required."
284     raise RuntimeError, cmd
285    
286 paus 1.3 crabFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'crab.cfg'
287 paus 1.2 if not os.path.exists(crabFile):
288     cmd = "Crab file not found: %s" % crabFile
289     raise RuntimeError, cmd
290 paus 1.3 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmssw + '.py'
291 paus 1.2 if not os.path.exists(cmsswFile):
292     cmd = "Cmssw file not found: %s" % cmsswFile
293     cmd = " XXXX ERROR no valid configuration found XXXX"
294     raise RuntimeError, cmd
295    
296 paus 1.3
297     # Prepare the ce/se translator
298     translator = translator.Translator(os.environ['MIT_PROD_DIR']+'/'+mitCfg+'/'+version+'/ceTable',
299     os.environ['MIT_PROD_DIR']+'/'+mitCfg+'/'+version+'/seTable')
300    
301 paus 1.2 # Resolve the other mitCfg parameters from the configuration file
302 paus 1.3 cmd = 'cat ' + os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + database
303 paus 1.2
304     join = 0
305     fullLine = ""
306     bSlash = "\\";
307     for line in os.popen(cmd).readlines(): # run command
308     line = line[:-1]
309     # get ride of empty or commented lines
310     if line == '' or line[0] == '#':
311     continue
312    
313     # join lines
314     if join == 1:
315     fullLine += line
316     else:
317     fullLine = line
318    
319     # determine if finished or more is coming
320     if fullLine[-1] == bSlash:
321     join = 1
322     fullLine = fullLine[:-1]
323     else:
324     join = 0
325     # test whether there is a directory
326     names = fullLine.split() # splitting every blank
327     if names[0] == cmsDataset or names[1] == mitDataset:
328     cmsDataset = names[0] # CMS name of the dataset
329     mitDataset = names[1] # equivalent MIT name of the dataset
330     nevents = int(names[2]) # number of events to be used in the production
331     if names[4] != "-":
332     localPath = names[4]
333     print "\n Sample info from database %s for CMSSW config %s\n %s"\
334     %(database,cmsswPy,fullLine)
335     if len(names) == 6:
336     dbs = names[5]
337     dbs = 'https://cmsdbsprod.cern.ch:8443/cms_dbs_' + dbs + '/servlet/DBSServlet'
338     print ' dbs: ' + dbs + '\n'
339     else:
340     print ''
341    
342     if mitDataset == None or cmsDataset == None:
343     print "ERROR - dataset not defined."
344     sys.exit(1)
345    
346     # Prepare file based processing input
347 paus 1.3 runFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'run.sh'
348 paus 1.2 if not os.path.exists(runFile):
349     cmd = "Run file not found: %s" % runFile
350     raise RuntimeError, cmd
351     cmd = 'cp ' + runFile + ' ./'
352     os.system(cmd)
353 paus 1.3 writeCfgFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'writeCfg.py'
354     cmd = 'cp ' + writeCfgFile + ' ./'
355     os.system(cmd)
356 paus 1.2
357     lfnFile = mitCfg + '/' + version + '/' + mitDataset + '.lfns'
358     if os.path.exists(lfnFile):
359 paus 1.5 print "\n INFO -- Lfn file found: %s. This means someone already worked on this dataset.\n" % lfnFile
360     cmd = 'rm ' + lfnFile
361 paus 1.2 os.system(cmd)
362    
363 paus 1.5 # always recreate, because the relvant copy still exists and is never touched
364     cmd = 'input.py --option=lfn --dataset=' + cmsDataset + ' > ' + lfnFile
365     print ' Input: ' + cmd + '\n'
366     os.system(cmd)
367    
368 paus 1.2 # Create the corresponding crab task
369     crabTask = task.Task(crabId,cmsDataset,mitCfg,version)
370     crabTask.storagePath = findStoragePath(mitCfg,version,mitDataset)
371     crabTask.loadAllLfns(lfnFile)
372     crabTask.loadCompletedLfns()
373     crabTask.createMissingLfns(lfnFile,lfnFile + '_' + crabTask.tag)
374 paus 1.4 if crabTask.nLfnMissing == 0:
375     print ' All requested LFNs are available. EXIT now.'
376     sys.exit()
377    
378 paus 1.2 crabTask.createSubTasks(lfnFile + '_' + crabTask.tag)
379     cmd = 'cp ' + lfnFile + '_' + crabTask.tag + '_*' + ' ./'
380     os.system(cmd)
381    
382     nevents = len(crabTask.lfns)
383    
384     # Say what we do now
385     print ' Preparing dataset: ' + cmsDataset + ' [MIT: ' + mitDataset + ' with ' + str(nevents) + \
386     ' total jobs(=lfns), nEvtsTotal: %d]' % crabTask.nTotalEvts
387    
388     # --------------------------------------------------------------------------------------------------
389     # Prepare the config files
390     # --------------------------------------------------------------------------------------------------
391     # Cleaning up
392 paus 1.3 cmd = "rm -f crab_" + crabTask.tag + ".cfg crab_" + crabTask.tag + ".cfg-Template " + cmsswPy
393 paus 1.2 os.system(cmd)
394    
395     # Parse template input and write the crab configuration file
396     fileInput = open(crabFile,'r')
397 paus 1.3 fileOutput = open("crab_" + crabTask.tag + ".cfg-Template",'w')
398 paus 1.2 line = fileInput.readline()
399     while (line != ''):
400     if line[0] != '#':
401     line = searchReplace(line,mitCfg,version,mitDataset, \
402     cmsDataset,cmsswPy,dbs,sched,blacklist,skpEvts)
403     fileOutput.write(line)
404     line = fileInput.readline()
405     fileInput .close()
406     fileOutput.close()
407    
408     # Parse template input and write the crab configuration file
409     fileInput = open(cmsswFile,'r')
410     fileOutput = open(cmsswPy,'w')
411     line = fileInput.readline()
412     while (line != ''):
413     if line[0] != '#':
414     line = searchReplace(line,mitCfg,version,mitDataset, \
415     cmsDataset,cmsswPy,dbs,sched,blacklist,skpEvts)
416     fileOutput.write(line)
417     line = fileInput.readline()
418     fileInput .close()
419     fileOutput.close()
420    
421     # --------------------------------------------------------------------------------------------------
422     # Job creation and submission
423     # --------------------------------------------------------------------------------------------------
424     print '\n Using CMSSW version: ' + os.environ['CMSSW_VERSION']
425     print ' Using CRAB version: ' + os.environ['CRAB_VERS'] + '\n'
426    
427     pattern1 = 'working directory'
428 paus 1.5 pattern2 = 'Total of '
429 paus 1.2
430     # Go through the crabTask and submit each subtask to the grid
431     for subTask in crabTask.subTasks:
432    
433     tag = crabTask.tag + '_' + subTask.tag()
434     print ' Working on subtask: ' + tag
435    
436     nJobsTotal = 0
437     crabIdCheck = ''
438    
439     # adjust crab config
440 paus 1.3 fileInput = open("crab_" + crabTask.tag + ".cfg-Template",'r')
441     fileOutput = open("crab_" + crabTask.tag + ".cfg",'w')
442 paus 1.2 line = fileInput.readline()
443     while (line != ''):
444     if line[0] != '#':
445     line = adjustCfg(line,subTask.nSubTaskLfn,tag)
446     fileOutput.write(line)
447     line = fileInput.readline()
448     fileInput .close()
449     fileOutput.close()
450    
451     # ----------------------------------------------------------------------------------------------
452     # Deal with storage element area
453     # ----------------------------------------------------------------------------------------------
454     # find the forseen storage place
455 paus 1.3 cmd = 'grep ^storage_element crab_' + crabTask.tag + '.cfg-Template'
456 paus 1.2 for file in os.popen(cmd).readlines(): # run command
457     line = file[:-1] # strip '\n'
458     # decode the storage element name
459     names = line.split("=") # splitting every '='
460     storageEle = names.pop()
461     storageEle = re.sub("\s", "",storageEle)
462 paus 1.3 cmd = 'grep ^storage_path crab_' + crabTask.tag + '.cfg-Template'
463 paus 1.2 for file in os.popen(cmd).readlines(): # run command
464     line = file[:-1] # strip '\n'
465     # decode the storage directory name
466     names = line.split("=") # splitting every '='
467     names = names[1:]
468     storagePath = "=".join(names)
469     storagePath = re.sub("\s", "",storagePath)
470 paus 1.3 cmd = 'grep ^user_remote_dir crab_' + crabTask.tag + '.cfg-Template'
471 paus 1.2 for file in os.popen(cmd).readlines(): # run command
472     line = file[:-1] # strip '\n'
473     # decode the storage directory name
474     names = line.split(" ") # splitting every '='
475     storagePath += names[-1]
476     storageUrl = 'srm://' + storageEle + ':8443' + storagePath
477    
478     storagePath = adjustCfg(storagePath,subTask.nSubTaskLfn,tag)
479     storageUrl = adjustCfg(storageUrl, subTask.nSubTaskLfn,tag)
480    
481     print ' StorageUrl: ' + storageUrl
482    
483     # Create storage area
484     if test == 0: # for testing we do not create the directories
485     if (storageEle == 'srm.cern.ch'):
486     createDirCern(storagePath)
487     else:
488     createDirGeneral(storageEle,storagePath)
489    
490     # cmd = "crab -create -debug 3 -USER.ui_working_dir=" + tag + " | tee forDaniele "
491 paus 1.3 cmd = "crab -create -cfg crab_" + crabTask.tag + ".cfg -USER.ui_working_dir=" + tag
492 paus 1.2 print ' -> ' + cmd
493     if test != 0:
494     cmd = 'echo ' + cmd
495     print ' ' + cmd
496     for line in os.popen(cmd).readlines(): # run command
497     line = line[:-1] # strip '\n'
498     print ' CRAB: ' + line
499     if re.search(pattern1,line):
500     f = line.split("/")
501     crabIdCheck = f[-2]
502     if re.search(pattern2,line):
503     f = line.split(" ")
504 paus 1.5 nJobsTotal = f[4]
505 paus 1.2 if nJobsTotal == '':
506     nJobsTotal = f[2]
507 paus 1.3 # report
508 paus 1.2 print ' --> %s jobs created (%s).\n'%(nJobsTotal,tag)
509 paus 1.3 # and cleanup the temporary file for the subtask
510     cmd = "rm -f " + mitDataset + ".lfns_" + tag
511     os.system(cmd)
512    
513 paus 1.2 # adjust arguments
514     cmd = 'input.py --db=' + lfnFile + '_' + tag + ' --option=xml --dataset=' + cmsDataset + \
515     ' > ' + tag + '/share/arguments.xml'
516     print ' update arguments: ' + cmd
517     if test == 0:
518     os.system(cmd)
519    
520     # loop through the file and determine the submission parameters
521     block = ''
522     blocks = []
523     idx = 0
524     minIdxs = []
525     maxIdxs = []
526    
527     fileInput = open(lfnFile + '_' + tag,'r')
528     line = fileInput.readline()
529     while (line != ''):
530     line = line[:-1]
531     if line[0] != '#':
532     idx += 1
533     f = line.split()
534     block = f[0]
535     lfn = f[1]
536     nEvents = f[2]
537     if len(blocks) == 0 or block != blocks[-1]:
538     # new block found
539     blocks .append(block)
540     minIdxs.append(idx)
541     maxIdxs.append(idx)
542     else:
543     maxIdxs[-1] = idx
544     # keep the reading going
545     line = fileInput.readline()
546     fileInput .close()
547    
548     # merge blocks together if they use the same sites
549     lastSites = ''
550     idx = 0
551     mergedIdx = 0
552     mergedBlocks = []
553     mergedSites = []
554     mergedMinIdxs = []
555     mergedMaxIdxs = []
556     print '\n Show the unmerged blocks:'
557     for block in blocks:
558     cmd = "sites.py --block=" + block
559     for line in os.popen(cmd).readlines(): # run command
560     line = line[:-1]
561     sites = line
562 paus 1.5
563     sites = translator.translateSes(sites)
564     sites = translator.selectPreferred()
565    
566 paus 1.2 print ' Block ' + block + ' process: %d to %d'%(minIdxs[idx],maxIdxs[idx]) + \
567     ' at\n > ' + sites
568     # block with different sites found
569     if sites != lastSites:
570     mergedSites .append(sites)
571 paus 1.6 mergedBlocks .append(blocks [idx]) # only the first block with these sites is stored
572 paus 1.2 mergedMinIdxs.append(minIdxs[idx])
573     mergedMaxIdxs.append(maxIdxs[idx])
574 paus 1.6 lastSites = sites
575     mergedIdx += 1
576 paus 1.2 else:
577     mergedMaxIdxs[mergedIdx-1] = maxIdxs[idx]
578 paus 1.6
579 paus 1.2 # last action in the loop: increment the unmerged blocks
580     idx += 1
581    
582     # Show already what we will do
583     idx = 0
584     print '\n Show the merged blocks:'
585     for block in mergedBlocks:
586     print ' Merged Block ' + block + ' process: %d to %d'\
587     %(mergedMinIdxs[idx],mergedMaxIdxs[idx]) + \
588     ' at\n > ' + mergedSites[idx]
589     # last action in the loop: increment the merged blocks
590     idx += 1
591    
592     # perfrom the submission block by block (using the merged blocks of course)
593     nSubmission = len(mergedBlocks)
594     idx = 0
595     print '\n Submit the merged blocks:'
596     for block in mergedBlocks:
597     print ' Merged Block ' + block + ' process: %d to %d'\
598     %(mergedMinIdxs[idx],mergedMaxIdxs[idx]) + \
599     ' at\n > ' + mergedSites[idx]
600    
601     nSubmit = '%d-%d'%(mergedMinIdxs[idx],mergedMaxIdxs[idx])
602     if mergedMinIdxs[idx] == mergedMaxIdxs[idx]:
603     nSubmit = '%d,%d'%(mergedMinIdxs[idx],100000000)
604 paus 1.5 cmd = 'crab -submit %s -continue %s -GRID.ce_white_list=%s'%(nSubmit,tag,mergedSites[idx])
605     print ' ' + cmd + '\n'
606 paus 1.3 status = os.system(cmd)
607     while status != 0:
608     print ' Submission failed (%s) --> retry'%(cmd)
609     status = os.system(cmd)
610    
611 paus 1.2 # last action in the loop: increment the merged blocks
612     idx += 1
613    
614     print ' Number of blocks submitted: %d' % nSubmission
615    
616 paus 1.3 # and cleanup the temporary file for the task
617     cmd = "rm -f crab_" + crabTask.tag + ".cfg crab_" + crabTask.tag + ".cfg-Template " \
618 paus 1.5 + cmsswPy + ' ' + cmsswPy + 'c' + ' writeCfg.py run.sh'
619 paus 1.3 os.system(cmd)
620    
621 paus 1.2 print ' Done... keep watching it...'
622     sys.exit(0)
623    
624    
625     ## if nSubmit != -1:
626     ## cmd = 'crab -continue %s -submit %s'%(crabId,nSubmit)
627     ## print 'SUBMIT.PY: ' + cmd
628     ## status = os.system(cmd)
629     ## if status == 0:
630     ## print ' --> job submitted\n'
631     ## else:
632     ## leftOver = int(nJobsTotal)
633     ## nSubmitted = 0
634     ## nSubBatch = 80
635     ## while (nSubmitted < int(nJobsTotal)):
636     ## if leftOver < nSubBatch:
637     ## nSubBatch = leftOver
638     ##
639     ## cmd = 'crab -continue %s -submit %d'%(crabId,nSubBatch)
640     ## print 'SUBMIT.PY: ' + cmd
641     ## status = os.system(cmd)
642     ## if status == 0:
643     ## print ' --> %d job submitted\n'%(nSubBatch)
644     ## leftOver = leftOver - nSubBatch
645     ## nSubmitted = nSubmitted + nSubBatch
646     ##
647     ## #print 'SUBMIT.PY: ' + cmd
648     ## #status = os.system(cmd)
649     ## #if status == 0:
650     ## # print ' --> job submitted\n'
651     ##
652    
653     # --------------------------------------------------------------------------------------------------
654     # Run a test job to test the configuration and measure the expected output size
655     # --------------------------------------------------------------------------------------------------
656     if noTestJob == 0:
657     #-----------------------------------------------------------------------------------------------
658     # use a specific file for test
659     #-----------------------------------------------------------------------------------------------
660     # first check whether we already have a local file
661     print '\n Try to find an existing local file using "find"'
662     f = cmsDataset.split("/")
663     name = f[1]
664     vers = f[2]
665     tier = f[3]
666     file = ""
667     lfn = ""
668     cmd = 'find ./store/ -name ' + name
669     print 'Searching: ' + cmd
670     for line in os.popen(cmd).readlines(): # run command
671     file = line[:-1] # strip '\n'
672    
673     # looks like there could be a file we found a directory, confirm
674     if file != "":
675     print '\n We have a directory ' + file + ' .. confirming'
676     file = file + '/' + tier
677     cmd = 'find ' + file + ' -name \*.root'
678     for line in os.popen(cmd).readlines(): # run command
679     if line.find(vers):
680     file = line[:-1] # strip '\n'
681     lfn = file[1:]
682    
683     if os.path.exists(file):
684     print ' moving on with locally found file: \n' + ' ' + file
685     else:
686     print ' no local file found'
687     file = ""
688     lfn = ""
689    
690     # now try to see whether we can find a file to download
691     if file == "":
692     cmd = './bin/findLfn.py --input=' + cmsDataset + ' | grep /store/'
693     print '\n Find an LFN to download: ' + cmd
694     for line in os.popen(cmd).readlines(): # run command
695     if line.find("/store") != -1:
696     lfn = line[:-1] # strip '\n'
697     break
698     if lfn == "":
699     print "\n WARNING: No file found, continue assuming it is a simulation job.\n\n"
700     else:
701     print ' --> LFN: ' + lfn
702     file = '.' + lfn
703    
704     if os.path.exists(file):
705     print ' --> File already exists: ' + file
706     else:
707     cmd = './bin/downloadLfn.py ' + lfn
708     print ' --> downloading: ' + cmd
709     status = os.system(cmd)
710     if status != 0:
711     print ' ERROR - failed to copy LFN. EXIT now!'
712     sys.exit(1)
713    
714     # Parse template input and adjust the input file to the newly copied lfn
715     fileInput = open(cmsswPy,'r')
716     fileOutput = open("test-"+cmsswPy,'w')
717     line = fileInput.readline()
718     while (line != ''):
719     if line.find("file:") != -1:
720     line = '"file:' + lfn[1:] + '"\n'
721     fileOutput.write(line)
722     line = fileInput.readline()
723     fileInput .close()
724     fileOutput.close()
725    
726     # Setting the number of events (hard coded in the file so far)
727     nTryEvts = 1000.
728    
729     print '\n --> Please wait, running a test job now! Should be short (trying %.0f'%nTryEvts + \
730     ' evts). Check log: cmssw.log'
731     cmd = 'rm -f cmssw.log; /usr/bin/time --format "%e %U %S" cmsRun test-' + cmsswPy + \
732     ' >& cmssw.log'
733     print ' CMD: ' + cmd
734     status = os.system(cmd)
735    
736     cmd = 'tail -1 cmssw.log'
737     for file in os.popen(cmd).readlines(): # run command
738     line = file[:-1] # strip '\n'
739     f = line.split() # splitting every blank
740     rtime = float(f[0]) # wall clock time
741     utime = float(f[1]) # user time
742     stime = float(f[2]) # system time
743    
744     nEvtsTest = 1000
745     cmd = 'grep \'Begin processing\' cmssw.log | tail -1'
746     for file in os.popen(cmd).readlines(): # run command
747     line = file[:-1] # strip '\n'
748     # test whether there is a directory
749     f = line.split() # splitting every blank
750     nEvtsTest = f[3] # this is the number of records processed
751     nEvtsTest = int(nEvtsTest[:-2]) # strip 'th'
752     cmd = 'ls -s ' + mitDataset + '*.root'
753     size = 0
754     for file in os.popen(cmd).readlines(): # run command
755     line = file[:-1] # strip '\n'
756     f = line.split() # splitting every blank
757     size += int(f[0])/1000. # size in MB
758    
759     cmd = 'tail -1 cmssw.log'
760     for file in os.popen(cmd).readlines(): # run command
761     line = file[:-1] # strip '\n'
762     # get total, user and system times
763     names = line.split() # splitting every blank
764    
765     if nEvtsTest != nTryEvts:
766     print ' WARNING - Instead of %f did %d'%(nTryEvts,nEvtsTest)
767    
768     print ' '
769     print ' Number of test events produced: %d'%nEvtsTest
770     print ' File size for all events: %.2f MB'%size
771     print ' Processing time for all events: %.2f secs (u: %.2f s: %.2f)'%(rtime,utime,stime)
772     print ' '
773     print ' --> 1 event == %.2f secs'%(rtime/nEvtsTest)
774     print ' --> 1.00 GB == %d events'%(nEvtsTest/size*1024.)
775     print ' --> %.2f GB == %d events'%(nevents/(nEvtsTest/size*1024.),nevents)
776     print ' '
777    
778     if testJob == 1:
779     print '\n Test job finished, stopping now.\n'
780     sys.exit(0)
781    
782    
783     ## # are we just completing an existing production? and is there something to complete?
784     ## if complete == 1:
785     ## f = storagePath.split('=')
786     ## rfDir = f[-1]
787     ## #cmd = 'castorInventory.py --nJobs=%s %s | grep Missing'%(nJobsTotal,rfDir)
788     ## cmd = 'castorInventory.py --nJobs=%s %s | grep Missing'%(nJobsTotal,storageUrl)
789     ## #print ' CMD: ' + cmd
790     ## for line in os.popen(cmd).readlines(): # run command
791     ## line = line[:-1] # strip '\n'
792     ## f = line.split(':')
793     ## nSubmit = f[1].strip()
794     ## f = nSubmit.split(',')
795     ## if len(f) == 0 or nSubmit == '':
796     ## print ' No more jobs left it seems, nSubmit=' + nSubmit
797     ## cmd = 'rm -rf ' + crabId
798     ## print ' Cleanup: ' + cmd + '\n\n'
799     ## status = os.system(cmd)
800     ## sys.exit(0)
801     ## elif len(f) == 1:
802     ## nInvalid = str(int(nJobsTotal) + 1000)
803     ## print ' One more jobs left, careful, adjusted, nSubmit=' + nSubmit
804     ## nSubmit = nSubmit + ',' + nInvalid
805     ##
806     ## ### nSubmit = ",".join(f[:-1])
807     ## print ' Missing jobs are: ' + nSubmit
808     ##
809     ## sys.exit(0)