ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/submit.py
Revision: 1.8
Committed: Sun Dec 5 01:01:21 2010 UTC (14 years, 5 months ago) by paus
Content type: text/x-python
Branch: MAIN
CVS Tags: Mit_020pre1, Mit_018, Mit_017
Changes since 1.7: +45 -40 lines
Log Message:
Next iteration with improved downloading tool.

File Contents

# User Rev Content
1 paus 1.2 #!/usr/bin/env python
2     #---------------------------------------------------------------------------------------------------
3     # Script to test, create and submit one complete production task
4     #
5     # Complete refers here to the proper preparation of the ultimate storage location (a storge element
6     # with a given storage path etc.), the submission of a test job to evaluate timing and data output
7     # size and finally the creation of the job configurations and the submission of the jobs to the grid
8     # sites. The grid is accessed via crab tools.
9     #
10     # While the script is pretty complete it has become a bit long and messy. Some cleanup will
11     # hopefully soon be performed.
12     #
13     # Author: C.Paus (July 1, 2008)
14     #---------------------------------------------------------------------------------------------------
15     import os,sys,getopt,re,string
16 paus 1.3 import task,translator
17 paus 1.2
18     #===================================================================================================
19     def searchReplace(line,mitCfg,version,mitDataset, \
20     cmsDataset='X',cmsswPy='X',dbs='X',sched='X',blacklist='X',skpEvts='X'):
21     # compile search and replacement sequences
22     pCmsDset = re.compile('XX-CMSDATASET-XX')
23     pMitDset = re.compile('XX-MITDATASET-XX')
24     pCmsswPy = re.compile('XX-CMSSWPY-XX')
25     pMitCfg = re.compile('XX-MITCFG-XX')
26     pMitVers = re.compile('XX-MITVERSION-XX')
27     pDbs = re.compile('XX-DBS-XX')
28     pSched = re.compile('XX-SCHED-XX')
29     pBlacklist = re.compile('XX-BLACKLIST-XX')
30     pSkpEvts = re.compile('XX-SKIPEVTS-XX')
31     # perform all search and replaces
32     line = pCmsDset .sub(cmsDataset, line);
33     line = pMitDset .sub(mitDataset, line);
34     line = pCmsswPy .sub(cmsswPy, line);
35     line = pMitCfg .sub(mitCfg, line);
36     line = pMitVers .sub(version, line);
37     line = pDbs .sub(dbs, line);
38     line = pSched .sub(sched, line);
39     line = pBlacklist.sub(blacklist, line);
40     return line
41    
42     #===================================================================================================
43     def adjustCfg(line,nevents,crabId):
44     # compile search and replacement sequences
45     pNevents = re.compile('XX-NEVENTS-XX')
46     pCrabId = re.compile('XX-CRABID-XX')
47     # perform all search and replaces
48     line = pNevents .sub(str(nevents),line);
49     line = pCrabId .sub(crabId, line);
50     return line
51    
52     #===================================================================================================
53     def findStoragePath(mitCfg,version,mitDataset):
54     # find the forseen storage place
55 paus 1.4 cmd = 'grep ^storage_path ' + os.environ['MIT_PROD_DIR'] \
56     + '/' + mitCfg + '/' + version + '/crab.cfg'
57 paus 1.2 for file in os.popen(cmd).readlines():
58     line = file[:-1]
59     line = searchReplace(line,mitCfg,version,mitDataset);
60     # decode the storage directory name
61     names = line.split("=")
62     names = names[1:]
63     storagePath = "=".join(names)
64     storagePath = re.sub("\s", "",storagePath)
65 paus 1.4 cmd = 'grep ^user_remote_dir ' + os.environ['MIT_PROD_DIR'] \
66     + '/' + mitCfg + '/' + version + '/crab.cfg'
67 paus 1.2 for file in os.popen(cmd).readlines():
68     line = file[:-1]
69     line = searchReplace(line,mitCfg,version,mitDataset);
70     # decode the storage directory name
71     names = line.split(" ")
72     storagePath += names[-1]
73     return storagePath
74    
75     #===================================================================================================
76     def createDirCern(storagePath):
77     # check whether path exists
78     cmd = 'rfdir ' + storagePath + ' >& /dev/null'
79     status = os.system(cmd)
80    
81     # create it if missing
82     if status == 0:
83     print ' Castor directory exists: ' + storagePath + '\n --> Moving on.'
84     else:
85     print ' Castor directory needs to be created.'
86     cmd = 'rfmkdir -p ' + storagePath
87     status = os.system(cmd)
88     if status == 0:
89     print ' --> Created: ' + storagePath
90    
91     # always set the permissions
92     cmd = 'rfchmod 777 ' + storagePath
93     status = os.system(cmd)
94     if status == 0:
95     print ' --> Set permissions: 777\n'
96     else:
97     print ' --> Setting permissions failed. EXIT now.\n'
98     sys.exit(1)
99    
100     #===================================================================================================
101 paus 1.5 def create(path):
102     status = -1
103     if re.search('/pnfs/cmsaf.mit.edu/t2bat',path):
104     f = path.split('=')
105     path = f[-1]
106     #f = path.split('/')
107     #path1 = "/".join(f[:-1])
108     #cmd = 'ssh paus@cgate mkdir -p ' + path1
109     cmd = 'ssh paus@cgate mkdir -p ' + path
110     status = os.system(cmd)
111     cmd = 'ssh paus@cgate chmod 777 ' + path
112     status = os.system(cmd)
113     return status
114    
115     #===================================================================================================
116 paus 1.2 def createDirGeneral(storageEle,storagePath):
117     # create all relevant subdirectories
118     f = storagePath.split('/') # splitting every '/'
119     storagePath2 = "/".join(f[:-1])
120     storagePath1 = "/".join(f[:-2])
121     storagePath0 = "/".join(f[:-3])
122    
123 paus 1.5 if create(storagePath) == 0:
124     print ' '
125     print ' Directory was created at MIT.\n'
126     return
127    
128 paus 1.2 # set the storage URL
129     storageUrl = 'srm://' + storageEle + ':8443' + storagePath
130    
131     # check whether path exists
132     cmd = 'srmls ' + storageUrl + ' >& /dev/null'
133     status = os.system(cmd)
134    
135     # create it only if missing
136     if status == 0:
137     print ' '
138     print ' Directory already found.... moving on.'
139    
140     else:
141     # create all relevant directories
142     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath0 + ' >& /dev/null'
143     print ' srmmkdir: ' + cmd
144     status = os.system(cmd)
145     print ' srmmkdir: status %d'%(status)
146     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath1 + ' >& /dev/null'
147     print ' srmmkdir: ' + cmd
148     status = os.system(cmd)
149     print ' srmmkdir: status %d'%(status)
150     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath2 + ' >& /dev/null'
151     print ' srmmkdir: ' + cmd
152     status = os.system(cmd)
153     print ' srmmkdir: status %d'%(status)
154    
155     # create the main storage directory
156     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath + ' >& /dev/null'
157     print ' srmmkdir: ' + cmd
158     status = os.system(cmd)
159     print ' srmmkdir: status %d'%(status)
160     if status == 0:
161     print ' '
162     print ' Directory was created.'
163     else:
164     print ' '
165     print ' '
166     print ' '
167     print ' ERROR - failed to create the Storage Area.'
168     print ' '
169     print ' '
170    
171     print ' Check permissions with: srmls -l -count=1 ' + storageUrl + '\n'
172     cmd = 'srmls -l -count=1 ' + storageUrl + ' | grep GroupPermission | grep RWX'
173     status = -1
174     for line in os.popen(cmd).readlines(): # run command
175     line = line[:-1]
176     print " Permissions? " + line
177     status = 0
178     #status = os.system(cmd)
179     print ' '
180    
181     if status != 0:
182     print ' --> Directory creation or permissions failed. EXIT now.\n'
183     sys.exit(1)
184    
185     #===================================================================================================
186     # Main starts here
187     #===================================================================================================
188     # Define string to explain usage of the script
189     usage = "Usage: submit.py --cmsDataset=<name> | --mitDataset=<name>\n"
190     usage += " --cmssw=<name>\n"
191     usage += " --mitCfg=<name>\n"
192     usage += " --version=<version>\n"
193     usage += " --dbs=<name>\n"
194     usage += " --sched=<name>\n"
195     usage += " --blacklist=<name>\n"
196     usage += " --nSubmit=<submittedJobs>\n"
197     usage += " --skipEvents=<'nRunX:nEventY','nRunXX:nEventYY',...>\n"
198     usage += " --complete\n"
199     usage += " --testJob\n"
200     usage += " --noTestJob\n"
201     usage += " --help\n"
202    
203     # Define the valid options which can be specified and check out the command line
204     valid = ['cmsDataset=','mitDataset=','cmssw=','mitCfg=','version=','dbs=','sched=','blacklist=',
205 paus 1.8 'nSubmit=','skipEvents=','complete','useExistingLfns','testJob','noTestJob','test','help']
206 paus 1.2 try:
207     opts, args = getopt.getopt(sys.argv[1:], "", valid)
208     except getopt.GetoptError, ex:
209     print usage
210     print str(ex)
211     sys.exit(1)
212    
213     # --------------------------------------------------------------------------------------------------
214     # Get all parameters for the production
215     # --------------------------------------------------------------------------------------------------
216 paus 1.8 # crab id
217 paus 1.2 cmd = "date +crab_0_%y%m%d_%H%M%S"
218     for line in os.popen(cmd).readlines(): # run command
219     line = line[:-1]
220     crabId = line
221     print "\n This job will be CrabId: " + crabId + "\n"
222 paus 1.8 # Set defaults for each option
223     cmsDataset = None
224     mitDataset = None
225     cmssw = "cmssw"
226     mitCfg = "filefi"
227     version = "014"
228     #dbs = "https://cmsdbsprod.cern.ch:8443/cms_dbs_prod_global/servlet/DBSServlet"
229     dbs = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
230     sched = "glite"
231     blacklist = ""
232     nSubmit = -1
233     skpEvts = ''
234     complete = 0
235     useExistingLfns = False
236     noTestJob = 0
237     testJob = 0
238     test = 0
239 paus 1.2
240     # Read new values from the command line
241     for opt, arg in opts:
242     if opt == "--help":
243     print usage
244     sys.exit(0)
245     if opt == "--cmsDataset":
246 paus 1.8 cmsDataset = arg
247 paus 1.2 if opt == "--mitDataset":
248 paus 1.8 mitDataset = arg
249 paus 1.2 if opt == "--cmssw":
250 paus 1.8 cmssw = arg
251 paus 1.2 if opt == "--mitCfg":
252 paus 1.8 mitCfg = arg
253 paus 1.2 if opt == "--version":
254 paus 1.8 version = arg
255 paus 1.2 if opt == "--dbs":
256 paus 1.8 dbs = arg
257 paus 1.2 if opt == "--sched":
258 paus 1.8 sched = arg
259 paus 1.2 if opt == "--blacklist":
260 paus 1.8 blacklist = arg
261 paus 1.2 if opt == "--nSubmit":
262 paus 1.8 nSubmit = arg
263 paus 1.2 if opt == "--skipEvents":
264 paus 1.8 skpEvts = arg
265 paus 1.2 if opt == "--complete":
266 paus 1.8 complete = 1
267     if opt == "--useExistingLfns":
268     useExistingLfns = True
269 paus 1.2 if opt == "--noTestJob":
270 paus 1.8 noTestJob = 1
271     testJob = 0
272 paus 1.2 if opt == "--testJob":
273 paus 1.8 testJob = 1
274     noTestJob = 0
275 paus 1.2 if opt == "--test":
276 paus 1.8 test = 1
277 paus 1.2
278     # Make sure we have the right 'database' and the right config file
279     database = 'Productions'
280 paus 1.3 cmsswPy = cmssw + '_' + crabId + '.py'
281 paus 1.2 if cmssw != 'cmssw':
282     database += '.' + cmssw
283    
284     # Deal with obvious problems
285     if cmsDataset == None and mitDataset == None:
286     cmd = "--cmsDataset | --mitDataset options not provided. One of them is required."
287     raise RuntimeError, cmd
288    
289 paus 1.3 crabFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'crab.cfg'
290 paus 1.2 if not os.path.exists(crabFile):
291     cmd = "Crab file not found: %s" % crabFile
292     raise RuntimeError, cmd
293 paus 1.3 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmssw + '.py'
294 paus 1.2 if not os.path.exists(cmsswFile):
295     cmd = "Cmssw file not found: %s" % cmsswFile
296     cmd = " XXXX ERROR no valid configuration found XXXX"
297     raise RuntimeError, cmd
298    
299 paus 1.3
300     # Prepare the ce/se translator
301     translator = translator.Translator(os.environ['MIT_PROD_DIR']+'/'+mitCfg+'/'+version+'/ceTable',
302     os.environ['MIT_PROD_DIR']+'/'+mitCfg+'/'+version+'/seTable')
303    
304 paus 1.2 # Resolve the other mitCfg parameters from the configuration file
305 paus 1.3 cmd = 'cat ' + os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + database
306 paus 1.2
307     join = 0
308     fullLine = ""
309     bSlash = "\\";
310     for line in os.popen(cmd).readlines(): # run command
311     line = line[:-1]
312     # get ride of empty or commented lines
313     if line == '' or line[0] == '#':
314     continue
315    
316     # join lines
317     if join == 1:
318     fullLine += line
319     else:
320     fullLine = line
321    
322     # determine if finished or more is coming
323     if fullLine[-1] == bSlash:
324     join = 1
325     fullLine = fullLine[:-1]
326     else:
327     join = 0
328     # test whether there is a directory
329     names = fullLine.split() # splitting every blank
330     if names[0] == cmsDataset or names[1] == mitDataset:
331     cmsDataset = names[0] # CMS name of the dataset
332     mitDataset = names[1] # equivalent MIT name of the dataset
333     nevents = int(names[2]) # number of events to be used in the production
334     if names[4] != "-":
335     localPath = names[4]
336     print "\n Sample info from database %s for CMSSW config %s\n %s"\
337     %(database,cmsswPy,fullLine)
338     if len(names) == 6:
339     dbs = names[5]
340     dbs = 'https://cmsdbsprod.cern.ch:8443/cms_dbs_' + dbs + '/servlet/DBSServlet'
341     print ' dbs: ' + dbs + '\n'
342     else:
343     print ''
344    
345     if mitDataset == None or cmsDataset == None:
346     print "ERROR - dataset not defined."
347     sys.exit(1)
348    
349     # Prepare file based processing input
350 paus 1.3 runFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'run.sh'
351 paus 1.2 if not os.path.exists(runFile):
352     cmd = "Run file not found: %s" % runFile
353     raise RuntimeError, cmd
354     cmd = 'cp ' + runFile + ' ./'
355     os.system(cmd)
356 paus 1.3 writeCfgFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'writeCfg.py'
357     cmd = 'cp ' + writeCfgFile + ' ./'
358     os.system(cmd)
359 paus 1.2
360     lfnFile = mitCfg + '/' + version + '/' + mitDataset + '.lfns'
361     if os.path.exists(lfnFile):
362 paus 1.5 print "\n INFO -- Lfn file found: %s. This means someone already worked on this dataset.\n" % lfnFile
363 paus 1.8 if not useExistingLfns:
364     cmd = 'rm ' + lfnFile
365     os.system(cmd)
366    
367     # recreate if requested or not existing
368     if not useExistingLfns or not os.path.exists(lfnFile):
369     cmd = 'input.py --option=lfn --dataset=' + cmsDataset + ' > ' + lfnFile
370     print ' Input: ' + cmd + '\n'
371 paus 1.2 os.system(cmd)
372    
373     # Create the corresponding crab task
374     crabTask = task.Task(crabId,cmsDataset,mitCfg,version)
375     crabTask.storagePath = findStoragePath(mitCfg,version,mitDataset)
376     crabTask.loadAllLfns(lfnFile)
377     crabTask.loadCompletedLfns()
378     crabTask.createMissingLfns(lfnFile,lfnFile + '_' + crabTask.tag)
379 paus 1.4 if crabTask.nLfnMissing == 0:
380     print ' All requested LFNs are available. EXIT now.'
381     sys.exit()
382    
383 paus 1.2 crabTask.createSubTasks(lfnFile + '_' + crabTask.tag)
384     cmd = 'cp ' + lfnFile + '_' + crabTask.tag + '_*' + ' ./'
385     os.system(cmd)
386    
387     nevents = len(crabTask.lfns)
388    
389     # Say what we do now
390     print ' Preparing dataset: ' + cmsDataset + ' [MIT: ' + mitDataset + ' with ' + str(nevents) + \
391     ' total jobs(=lfns), nEvtsTotal: %d]' % crabTask.nTotalEvts
392    
393     # --------------------------------------------------------------------------------------------------
394     # Prepare the config files
395     # --------------------------------------------------------------------------------------------------
396     # Cleaning up
397 paus 1.3 cmd = "rm -f crab_" + crabTask.tag + ".cfg crab_" + crabTask.tag + ".cfg-Template " + cmsswPy
398 paus 1.2 os.system(cmd)
399    
400     # Parse template input and write the crab configuration file
401     fileInput = open(crabFile,'r')
402 paus 1.3 fileOutput = open("crab_" + crabTask.tag + ".cfg-Template",'w')
403 paus 1.2 line = fileInput.readline()
404     while (line != ''):
405     if line[0] != '#':
406     line = searchReplace(line,mitCfg,version,mitDataset, \
407     cmsDataset,cmsswPy,dbs,sched,blacklist,skpEvts)
408     fileOutput.write(line)
409     line = fileInput.readline()
410     fileInput .close()
411     fileOutput.close()
412    
413     # Parse template input and write the crab configuration file
414     fileInput = open(cmsswFile,'r')
415     fileOutput = open(cmsswPy,'w')
416     line = fileInput.readline()
417     while (line != ''):
418     if line[0] != '#':
419     line = searchReplace(line,mitCfg,version,mitDataset, \
420     cmsDataset,cmsswPy,dbs,sched,blacklist,skpEvts)
421     fileOutput.write(line)
422     line = fileInput.readline()
423     fileInput .close()
424     fileOutput.close()
425    
426     # --------------------------------------------------------------------------------------------------
427     # Job creation and submission
428     # --------------------------------------------------------------------------------------------------
429     print '\n Using CMSSW version: ' + os.environ['CMSSW_VERSION']
430     print ' Using CRAB version: ' + os.environ['CRAB_VERS'] + '\n'
431    
432     pattern1 = 'working directory'
433 paus 1.5 pattern2 = 'Total of '
434 paus 1.2
435     # Go through the crabTask and submit each subtask to the grid
436     for subTask in crabTask.subTasks:
437    
438     tag = crabTask.tag + '_' + subTask.tag()
439     print ' Working on subtask: ' + tag
440    
441     nJobsTotal = 0
442     crabIdCheck = ''
443    
444     # adjust crab config
445 paus 1.3 fileInput = open("crab_" + crabTask.tag + ".cfg-Template",'r')
446     fileOutput = open("crab_" + crabTask.tag + ".cfg",'w')
447 paus 1.2 line = fileInput.readline()
448     while (line != ''):
449     if line[0] != '#':
450     line = adjustCfg(line,subTask.nSubTaskLfn,tag)
451     fileOutput.write(line)
452     line = fileInput.readline()
453     fileInput .close()
454     fileOutput.close()
455    
456     # ----------------------------------------------------------------------------------------------
457     # Deal with storage element area
458     # ----------------------------------------------------------------------------------------------
459     # find the forseen storage place
460 paus 1.3 cmd = 'grep ^storage_element crab_' + crabTask.tag + '.cfg-Template'
461 paus 1.2 for file in os.popen(cmd).readlines(): # run command
462     line = file[:-1] # strip '\n'
463     # decode the storage element name
464     names = line.split("=") # splitting every '='
465     storageEle = names.pop()
466     storageEle = re.sub("\s", "",storageEle)
467 paus 1.3 cmd = 'grep ^storage_path crab_' + crabTask.tag + '.cfg-Template'
468 paus 1.2 for file in os.popen(cmd).readlines(): # run command
469     line = file[:-1] # strip '\n'
470     # decode the storage directory name
471     names = line.split("=") # splitting every '='
472     names = names[1:]
473     storagePath = "=".join(names)
474     storagePath = re.sub("\s", "",storagePath)
475 paus 1.3 cmd = 'grep ^user_remote_dir crab_' + crabTask.tag + '.cfg-Template'
476 paus 1.2 for file in os.popen(cmd).readlines(): # run command
477     line = file[:-1] # strip '\n'
478     # decode the storage directory name
479     names = line.split(" ") # splitting every '='
480     storagePath += names[-1]
481     storageUrl = 'srm://' + storageEle + ':8443' + storagePath
482    
483     storagePath = adjustCfg(storagePath,subTask.nSubTaskLfn,tag)
484     storageUrl = adjustCfg(storageUrl, subTask.nSubTaskLfn,tag)
485    
486     print ' StorageUrl: ' + storageUrl
487    
488     # Create storage area
489     if test == 0: # for testing we do not create the directories
490     if (storageEle == 'srm.cern.ch'):
491     createDirCern(storagePath)
492     else:
493     createDirGeneral(storageEle,storagePath)
494    
495     # cmd = "crab -create -debug 3 -USER.ui_working_dir=" + tag + " | tee forDaniele "
496 paus 1.3 cmd = "crab -create -cfg crab_" + crabTask.tag + ".cfg -USER.ui_working_dir=" + tag
497 paus 1.2 print ' -> ' + cmd
498     if test != 0:
499     cmd = 'echo ' + cmd
500     print ' ' + cmd
501     for line in os.popen(cmd).readlines(): # run command
502     line = line[:-1] # strip '\n'
503     print ' CRAB: ' + line
504     if re.search(pattern1,line):
505     f = line.split("/")
506     crabIdCheck = f[-2]
507     if re.search(pattern2,line):
508     f = line.split(" ")
509 paus 1.5 nJobsTotal = f[4]
510 paus 1.2 if nJobsTotal == '':
511     nJobsTotal = f[2]
512 paus 1.3 # report
513 paus 1.2 print ' --> %s jobs created (%s).\n'%(nJobsTotal,tag)
514 paus 1.3 # and cleanup the temporary file for the subtask
515     cmd = "rm -f " + mitDataset + ".lfns_" + tag
516     os.system(cmd)
517    
518 paus 1.2 # adjust arguments
519     cmd = 'input.py --db=' + lfnFile + '_' + tag + ' --option=xml --dataset=' + cmsDataset + \
520     ' > ' + tag + '/share/arguments.xml'
521     print ' update arguments: ' + cmd
522     if test == 0:
523     os.system(cmd)
524    
525     # loop through the file and determine the submission parameters
526     block = ''
527     blocks = []
528     idx = 0
529     minIdxs = []
530     maxIdxs = []
531    
532     fileInput = open(lfnFile + '_' + tag,'r')
533     line = fileInput.readline()
534     while (line != ''):
535     line = line[:-1]
536     if line[0] != '#':
537     idx += 1
538     f = line.split()
539     block = f[0]
540     lfn = f[1]
541     nEvents = f[2]
542     if len(blocks) == 0 or block != blocks[-1]:
543     # new block found
544     blocks .append(block)
545     minIdxs.append(idx)
546     maxIdxs.append(idx)
547     else:
548     maxIdxs[-1] = idx
549     # keep the reading going
550     line = fileInput.readline()
551     fileInput .close()
552    
553     # merge blocks together if they use the same sites
554     lastSites = ''
555     idx = 0
556     mergedIdx = 0
557     mergedBlocks = []
558     mergedSites = []
559     mergedMinIdxs = []
560     mergedMaxIdxs = []
561     print '\n Show the unmerged blocks:'
562     for block in blocks:
563     cmd = "sites.py --block=" + block
564     for line in os.popen(cmd).readlines(): # run command
565     line = line[:-1]
566     sites = line
567 paus 1.5
568     sites = translator.translateSes(sites)
569 ceballos 1.7 #sites = translator.selectPreferred()
570 paus 1.5
571 paus 1.2 print ' Block ' + block + ' process: %d to %d'%(minIdxs[idx],maxIdxs[idx]) + \
572     ' at\n > ' + sites
573     # block with different sites found
574     if sites != lastSites:
575     mergedSites .append(sites)
576 paus 1.6 mergedBlocks .append(blocks [idx]) # only the first block with these sites is stored
577 paus 1.2 mergedMinIdxs.append(minIdxs[idx])
578     mergedMaxIdxs.append(maxIdxs[idx])
579 paus 1.6 lastSites = sites
580     mergedIdx += 1
581 paus 1.2 else:
582     mergedMaxIdxs[mergedIdx-1] = maxIdxs[idx]
583 paus 1.6
584 paus 1.2 # last action in the loop: increment the unmerged blocks
585     idx += 1
586    
587     # Show already what we will do
588     idx = 0
589     print '\n Show the merged blocks:'
590     for block in mergedBlocks:
591     print ' Merged Block ' + block + ' process: %d to %d'\
592     %(mergedMinIdxs[idx],mergedMaxIdxs[idx]) + \
593     ' at\n > ' + mergedSites[idx]
594     # last action in the loop: increment the merged blocks
595     idx += 1
596    
597     # perfrom the submission block by block (using the merged blocks of course)
598     nSubmission = len(mergedBlocks)
599     idx = 0
600     print '\n Submit the merged blocks:'
601     for block in mergedBlocks:
602     print ' Merged Block ' + block + ' process: %d to %d'\
603     %(mergedMinIdxs[idx],mergedMaxIdxs[idx]) + \
604     ' at\n > ' + mergedSites[idx]
605    
606     nSubmit = '%d-%d'%(mergedMinIdxs[idx],mergedMaxIdxs[idx])
607     if mergedMinIdxs[idx] == mergedMaxIdxs[idx]:
608     nSubmit = '%d,%d'%(mergedMinIdxs[idx],100000000)
609 paus 1.5 cmd = 'crab -submit %s -continue %s -GRID.ce_white_list=%s'%(nSubmit,tag,mergedSites[idx])
610     print ' ' + cmd + '\n'
611 paus 1.3 status = os.system(cmd)
612     while status != 0:
613     print ' Submission failed (%s) --> retry'%(cmd)
614     status = os.system(cmd)
615    
616 paus 1.2 # last action in the loop: increment the merged blocks
617     idx += 1
618    
619     print ' Number of blocks submitted: %d' % nSubmission
620    
621 paus 1.3 # and cleanup the temporary file for the task
622     cmd = "rm -f crab_" + crabTask.tag + ".cfg crab_" + crabTask.tag + ".cfg-Template " \
623 paus 1.5 + cmsswPy + ' ' + cmsswPy + 'c' + ' writeCfg.py run.sh'
624 paus 1.3 os.system(cmd)
625    
626 paus 1.2 print ' Done... keep watching it...'
627     sys.exit(0)
628    
629    
630     ## if nSubmit != -1:
631     ## cmd = 'crab -continue %s -submit %s'%(crabId,nSubmit)
632     ## print 'SUBMIT.PY: ' + cmd
633     ## status = os.system(cmd)
634     ## if status == 0:
635     ## print ' --> job submitted\n'
636     ## else:
637     ## leftOver = int(nJobsTotal)
638     ## nSubmitted = 0
639     ## nSubBatch = 80
640     ## while (nSubmitted < int(nJobsTotal)):
641     ## if leftOver < nSubBatch:
642     ## nSubBatch = leftOver
643     ##
644     ## cmd = 'crab -continue %s -submit %d'%(crabId,nSubBatch)
645     ## print 'SUBMIT.PY: ' + cmd
646     ## status = os.system(cmd)
647     ## if status == 0:
648     ## print ' --> %d job submitted\n'%(nSubBatch)
649     ## leftOver = leftOver - nSubBatch
650     ## nSubmitted = nSubmitted + nSubBatch
651     ##
652     ## #print 'SUBMIT.PY: ' + cmd
653     ## #status = os.system(cmd)
654     ## #if status == 0:
655     ## # print ' --> job submitted\n'
656     ##
657    
658     # --------------------------------------------------------------------------------------------------
659     # Run a test job to test the configuration and measure the expected output size
660     # --------------------------------------------------------------------------------------------------
661     if noTestJob == 0:
662     #-----------------------------------------------------------------------------------------------
663     # use a specific file for test
664     #-----------------------------------------------------------------------------------------------
665     # first check whether we already have a local file
666     print '\n Try to find an existing local file using "find"'
667     f = cmsDataset.split("/")
668     name = f[1]
669     vers = f[2]
670     tier = f[3]
671     file = ""
672     lfn = ""
673     cmd = 'find ./store/ -name ' + name
674     print 'Searching: ' + cmd
675     for line in os.popen(cmd).readlines(): # run command
676     file = line[:-1] # strip '\n'
677    
678     # looks like there could be a file we found a directory, confirm
679     if file != "":
680     print '\n We have a directory ' + file + ' .. confirming'
681     file = file + '/' + tier
682     cmd = 'find ' + file + ' -name \*.root'
683     for line in os.popen(cmd).readlines(): # run command
684     if line.find(vers):
685     file = line[:-1] # strip '\n'
686     lfn = file[1:]
687    
688     if os.path.exists(file):
689     print ' moving on with locally found file: \n' + ' ' + file
690     else:
691     print ' no local file found'
692     file = ""
693     lfn = ""
694    
695     # now try to see whether we can find a file to download
696     if file == "":
697     cmd = './bin/findLfn.py --input=' + cmsDataset + ' | grep /store/'
698     print '\n Find an LFN to download: ' + cmd
699     for line in os.popen(cmd).readlines(): # run command
700     if line.find("/store") != -1:
701     lfn = line[:-1] # strip '\n'
702     break
703     if lfn == "":
704     print "\n WARNING: No file found, continue assuming it is a simulation job.\n\n"
705     else:
706     print ' --> LFN: ' + lfn
707     file = '.' + lfn
708    
709     if os.path.exists(file):
710     print ' --> File already exists: ' + file
711     else:
712     cmd = './bin/downloadLfn.py ' + lfn
713     print ' --> downloading: ' + cmd
714     status = os.system(cmd)
715     if status != 0:
716     print ' ERROR - failed to copy LFN. EXIT now!'
717     sys.exit(1)
718    
719     # Parse template input and adjust the input file to the newly copied lfn
720     fileInput = open(cmsswPy,'r')
721     fileOutput = open("test-"+cmsswPy,'w')
722     line = fileInput.readline()
723     while (line != ''):
724     if line.find("file:") != -1:
725     line = '"file:' + lfn[1:] + '"\n'
726     fileOutput.write(line)
727     line = fileInput.readline()
728     fileInput .close()
729     fileOutput.close()
730    
731     # Setting the number of events (hard coded in the file so far)
732     nTryEvts = 1000.
733    
734     print '\n --> Please wait, running a test job now! Should be short (trying %.0f'%nTryEvts + \
735     ' evts). Check log: cmssw.log'
736     cmd = 'rm -f cmssw.log; /usr/bin/time --format "%e %U %S" cmsRun test-' + cmsswPy + \
737     ' >& cmssw.log'
738     print ' CMD: ' + cmd
739     status = os.system(cmd)
740    
741     cmd = 'tail -1 cmssw.log'
742     for file in os.popen(cmd).readlines(): # run command
743     line = file[:-1] # strip '\n'
744     f = line.split() # splitting every blank
745     rtime = float(f[0]) # wall clock time
746     utime = float(f[1]) # user time
747     stime = float(f[2]) # system time
748    
749     nEvtsTest = 1000
750     cmd = 'grep \'Begin processing\' cmssw.log | tail -1'
751     for file in os.popen(cmd).readlines(): # run command
752     line = file[:-1] # strip '\n'
753     # test whether there is a directory
754     f = line.split() # splitting every blank
755     nEvtsTest = f[3] # this is the number of records processed
756     nEvtsTest = int(nEvtsTest[:-2]) # strip 'th'
757     cmd = 'ls -s ' + mitDataset + '*.root'
758     size = 0
759     for file in os.popen(cmd).readlines(): # run command
760     line = file[:-1] # strip '\n'
761     f = line.split() # splitting every blank
762     size += int(f[0])/1000. # size in MB
763    
764     cmd = 'tail -1 cmssw.log'
765     for file in os.popen(cmd).readlines(): # run command
766     line = file[:-1] # strip '\n'
767     # get total, user and system times
768     names = line.split() # splitting every blank
769    
770     if nEvtsTest != nTryEvts:
771     print ' WARNING - Instead of %f did %d'%(nTryEvts,nEvtsTest)
772    
773     print ' '
774     print ' Number of test events produced: %d'%nEvtsTest
775     print ' File size for all events: %.2f MB'%size
776     print ' Processing time for all events: %.2f secs (u: %.2f s: %.2f)'%(rtime,utime,stime)
777     print ' '
778     print ' --> 1 event == %.2f secs'%(rtime/nEvtsTest)
779     print ' --> 1.00 GB == %d events'%(nEvtsTest/size*1024.)
780     print ' --> %.2f GB == %d events'%(nevents/(nEvtsTest/size*1024.),nevents)
781     print ' '
782    
783     if testJob == 1:
784     print '\n Test job finished, stopping now.\n'
785     sys.exit(0)
786    
787    
788     ## # are we just completing an existing production? and is there something to complete?
789     ## if complete == 1:
790     ## f = storagePath.split('=')
791     ## rfDir = f[-1]
792     ## #cmd = 'castorInventory.py --nJobs=%s %s | grep Missing'%(nJobsTotal,rfDir)
793     ## cmd = 'castorInventory.py --nJobs=%s %s | grep Missing'%(nJobsTotal,storageUrl)
794     ## #print ' CMD: ' + cmd
795     ## for line in os.popen(cmd).readlines(): # run command
796     ## line = line[:-1] # strip '\n'
797     ## f = line.split(':')
798     ## nSubmit = f[1].strip()
799     ## f = nSubmit.split(',')
800     ## if len(f) == 0 or nSubmit == '':
801     ## print ' No more jobs left it seems, nSubmit=' + nSubmit
802     ## cmd = 'rm -rf ' + crabId
803     ## print ' Cleanup: ' + cmd + '\n\n'
804     ## status = os.system(cmd)
805     ## sys.exit(0)
806     ## elif len(f) == 1:
807     ## nInvalid = str(int(nJobsTotal) + 1000)
808     ## print ' One more jobs left, careful, adjusted, nSubmit=' + nSubmit
809     ## nSubmit = nSubmit + ',' + nInvalid
810     ##
811     ## ### nSubmit = ",".join(f[:-1])
812     ## print ' Missing jobs are: ' + nSubmit
813     ##
814     ## sys.exit(0)