ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/submit.py
Revision: 1.8
Committed: Sun Dec 5 01:01:21 2010 UTC (14 years, 5 months ago) by paus
Content type: text/x-python
Branch: MAIN
CVS Tags: Mit_020pre1, Mit_018, Mit_017
Changes since 1.7: +45 -40 lines
Log Message:
Next iteration with improved downloading tool.

File Contents

# Content
1 #!/usr/bin/env python
2 #---------------------------------------------------------------------------------------------------
3 # Script to test, create and submit one complete production task
4 #
5 # Complete refers here to the proper preparation of the ultimate storage location (a storge element
6 # with a given storage path etc.), the submission of a test job to evaluate timing and data output
7 # size and finally the creation of the job configurations and the submission of the jobs to the grid
8 # sites. The grid is accessed via crab tools.
9 #
10 # While the script is pretty complete it has become a bit long and messy. Some cleanup will
11 # hopefully soon be performed.
12 #
13 # Author: C.Paus (July 1, 2008)
14 #---------------------------------------------------------------------------------------------------
15 import os,sys,getopt,re,string
16 import task,translator
17
18 #===================================================================================================
19 def searchReplace(line,mitCfg,version,mitDataset, \
20 cmsDataset='X',cmsswPy='X',dbs='X',sched='X',blacklist='X',skpEvts='X'):
21 # compile search and replacement sequences
22 pCmsDset = re.compile('XX-CMSDATASET-XX')
23 pMitDset = re.compile('XX-MITDATASET-XX')
24 pCmsswPy = re.compile('XX-CMSSWPY-XX')
25 pMitCfg = re.compile('XX-MITCFG-XX')
26 pMitVers = re.compile('XX-MITVERSION-XX')
27 pDbs = re.compile('XX-DBS-XX')
28 pSched = re.compile('XX-SCHED-XX')
29 pBlacklist = re.compile('XX-BLACKLIST-XX')
30 pSkpEvts = re.compile('XX-SKIPEVTS-XX')
31 # perform all search and replaces
32 line = pCmsDset .sub(cmsDataset, line);
33 line = pMitDset .sub(mitDataset, line);
34 line = pCmsswPy .sub(cmsswPy, line);
35 line = pMitCfg .sub(mitCfg, line);
36 line = pMitVers .sub(version, line);
37 line = pDbs .sub(dbs, line);
38 line = pSched .sub(sched, line);
39 line = pBlacklist.sub(blacklist, line);
40 return line
41
42 #===================================================================================================
43 def adjustCfg(line,nevents,crabId):
44 # compile search and replacement sequences
45 pNevents = re.compile('XX-NEVENTS-XX')
46 pCrabId = re.compile('XX-CRABID-XX')
47 # perform all search and replaces
48 line = pNevents .sub(str(nevents),line);
49 line = pCrabId .sub(crabId, line);
50 return line
51
52 #===================================================================================================
53 def findStoragePath(mitCfg,version,mitDataset):
54 # find the forseen storage place
55 cmd = 'grep ^storage_path ' + os.environ['MIT_PROD_DIR'] \
56 + '/' + mitCfg + '/' + version + '/crab.cfg'
57 for file in os.popen(cmd).readlines():
58 line = file[:-1]
59 line = searchReplace(line,mitCfg,version,mitDataset);
60 # decode the storage directory name
61 names = line.split("=")
62 names = names[1:]
63 storagePath = "=".join(names)
64 storagePath = re.sub("\s", "",storagePath)
65 cmd = 'grep ^user_remote_dir ' + os.environ['MIT_PROD_DIR'] \
66 + '/' + mitCfg + '/' + version + '/crab.cfg'
67 for file in os.popen(cmd).readlines():
68 line = file[:-1]
69 line = searchReplace(line,mitCfg,version,mitDataset);
70 # decode the storage directory name
71 names = line.split(" ")
72 storagePath += names[-1]
73 return storagePath
74
75 #===================================================================================================
76 def createDirCern(storagePath):
77 # check whether path exists
78 cmd = 'rfdir ' + storagePath + ' >& /dev/null'
79 status = os.system(cmd)
80
81 # create it if missing
82 if status == 0:
83 print ' Castor directory exists: ' + storagePath + '\n --> Moving on.'
84 else:
85 print ' Castor directory needs to be created.'
86 cmd = 'rfmkdir -p ' + storagePath
87 status = os.system(cmd)
88 if status == 0:
89 print ' --> Created: ' + storagePath
90
91 # always set the permissions
92 cmd = 'rfchmod 777 ' + storagePath
93 status = os.system(cmd)
94 if status == 0:
95 print ' --> Set permissions: 777\n'
96 else:
97 print ' --> Setting permissions failed. EXIT now.\n'
98 sys.exit(1)
99
100 #===================================================================================================
101 def create(path):
102 status = -1
103 if re.search('/pnfs/cmsaf.mit.edu/t2bat',path):
104 f = path.split('=')
105 path = f[-1]
106 #f = path.split('/')
107 #path1 = "/".join(f[:-1])
108 #cmd = 'ssh paus@cgate mkdir -p ' + path1
109 cmd = 'ssh paus@cgate mkdir -p ' + path
110 status = os.system(cmd)
111 cmd = 'ssh paus@cgate chmod 777 ' + path
112 status = os.system(cmd)
113 return status
114
115 #===================================================================================================
116 def createDirGeneral(storageEle,storagePath):
117 # create all relevant subdirectories
118 f = storagePath.split('/') # splitting every '/'
119 storagePath2 = "/".join(f[:-1])
120 storagePath1 = "/".join(f[:-2])
121 storagePath0 = "/".join(f[:-3])
122
123 if create(storagePath) == 0:
124 print ' '
125 print ' Directory was created at MIT.\n'
126 return
127
128 # set the storage URL
129 storageUrl = 'srm://' + storageEle + ':8443' + storagePath
130
131 # check whether path exists
132 cmd = 'srmls ' + storageUrl + ' >& /dev/null'
133 status = os.system(cmd)
134
135 # create it only if missing
136 if status == 0:
137 print ' '
138 print ' Directory already found.... moving on.'
139
140 else:
141 # create all relevant directories
142 cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath0 + ' >& /dev/null'
143 print ' srmmkdir: ' + cmd
144 status = os.system(cmd)
145 print ' srmmkdir: status %d'%(status)
146 cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath1 + ' >& /dev/null'
147 print ' srmmkdir: ' + cmd
148 status = os.system(cmd)
149 print ' srmmkdir: status %d'%(status)
150 cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath2 + ' >& /dev/null'
151 print ' srmmkdir: ' + cmd
152 status = os.system(cmd)
153 print ' srmmkdir: status %d'%(status)
154
155 # create the main storage directory
156 cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath + ' >& /dev/null'
157 print ' srmmkdir: ' + cmd
158 status = os.system(cmd)
159 print ' srmmkdir: status %d'%(status)
160 if status == 0:
161 print ' '
162 print ' Directory was created.'
163 else:
164 print ' '
165 print ' '
166 print ' '
167 print ' ERROR - failed to create the Storage Area.'
168 print ' '
169 print ' '
170
171 print ' Check permissions with: srmls -l -count=1 ' + storageUrl + '\n'
172 cmd = 'srmls -l -count=1 ' + storageUrl + ' | grep GroupPermission | grep RWX'
173 status = -1
174 for line in os.popen(cmd).readlines(): # run command
175 line = line[:-1]
176 print " Permissions? " + line
177 status = 0
178 #status = os.system(cmd)
179 print ' '
180
181 if status != 0:
182 print ' --> Directory creation or permissions failed. EXIT now.\n'
183 sys.exit(1)
184
185 #===================================================================================================
186 # Main starts here
187 #===================================================================================================
188 # Define string to explain usage of the script
189 usage = "Usage: submit.py --cmsDataset=<name> | --mitDataset=<name>\n"
190 usage += " --cmssw=<name>\n"
191 usage += " --mitCfg=<name>\n"
192 usage += " --version=<version>\n"
193 usage += " --dbs=<name>\n"
194 usage += " --sched=<name>\n"
195 usage += " --blacklist=<name>\n"
196 usage += " --nSubmit=<submittedJobs>\n"
197 usage += " --skipEvents=<'nRunX:nEventY','nRunXX:nEventYY',...>\n"
198 usage += " --complete\n"
199 usage += " --testJob\n"
200 usage += " --noTestJob\n"
201 usage += " --help\n"
202
203 # Define the valid options which can be specified and check out the command line
204 valid = ['cmsDataset=','mitDataset=','cmssw=','mitCfg=','version=','dbs=','sched=','blacklist=',
205 'nSubmit=','skipEvents=','complete','useExistingLfns','testJob','noTestJob','test','help']
206 try:
207 opts, args = getopt.getopt(sys.argv[1:], "", valid)
208 except getopt.GetoptError, ex:
209 print usage
210 print str(ex)
211 sys.exit(1)
212
213 # --------------------------------------------------------------------------------------------------
214 # Get all parameters for the production
215 # --------------------------------------------------------------------------------------------------
216 # crab id
217 cmd = "date +crab_0_%y%m%d_%H%M%S"
218 for line in os.popen(cmd).readlines(): # run command
219 line = line[:-1]
220 crabId = line
221 print "\n This job will be CrabId: " + crabId + "\n"
222 # Set defaults for each option
223 cmsDataset = None
224 mitDataset = None
225 cmssw = "cmssw"
226 mitCfg = "filefi"
227 version = "014"
228 #dbs = "https://cmsdbsprod.cern.ch:8443/cms_dbs_prod_global/servlet/DBSServlet"
229 dbs = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
230 sched = "glite"
231 blacklist = ""
232 nSubmit = -1
233 skpEvts = ''
234 complete = 0
235 useExistingLfns = False
236 noTestJob = 0
237 testJob = 0
238 test = 0
239
240 # Read new values from the command line
241 for opt, arg in opts:
242 if opt == "--help":
243 print usage
244 sys.exit(0)
245 if opt == "--cmsDataset":
246 cmsDataset = arg
247 if opt == "--mitDataset":
248 mitDataset = arg
249 if opt == "--cmssw":
250 cmssw = arg
251 if opt == "--mitCfg":
252 mitCfg = arg
253 if opt == "--version":
254 version = arg
255 if opt == "--dbs":
256 dbs = arg
257 if opt == "--sched":
258 sched = arg
259 if opt == "--blacklist":
260 blacklist = arg
261 if opt == "--nSubmit":
262 nSubmit = arg
263 if opt == "--skipEvents":
264 skpEvts = arg
265 if opt == "--complete":
266 complete = 1
267 if opt == "--useExistingLfns":
268 useExistingLfns = True
269 if opt == "--noTestJob":
270 noTestJob = 1
271 testJob = 0
272 if opt == "--testJob":
273 testJob = 1
274 noTestJob = 0
275 if opt == "--test":
276 test = 1
277
278 # Make sure we have the right 'database' and the right config file
279 database = 'Productions'
280 cmsswPy = cmssw + '_' + crabId + '.py'
281 if cmssw != 'cmssw':
282 database += '.' + cmssw
283
284 # Deal with obvious problems
285 if cmsDataset == None and mitDataset == None:
286 cmd = "--cmsDataset | --mitDataset options not provided. One of them is required."
287 raise RuntimeError, cmd
288
289 crabFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'crab.cfg'
290 if not os.path.exists(crabFile):
291 cmd = "Crab file not found: %s" % crabFile
292 raise RuntimeError, cmd
293 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmssw + '.py'
294 if not os.path.exists(cmsswFile):
295 cmd = "Cmssw file not found: %s" % cmsswFile
296 cmd = " XXXX ERROR no valid configuration found XXXX"
297 raise RuntimeError, cmd
298
299
300 # Prepare the ce/se translator
301 translator = translator.Translator(os.environ['MIT_PROD_DIR']+'/'+mitCfg+'/'+version+'/ceTable',
302 os.environ['MIT_PROD_DIR']+'/'+mitCfg+'/'+version+'/seTable')
303
304 # Resolve the other mitCfg parameters from the configuration file
305 cmd = 'cat ' + os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + database
306
307 join = 0
308 fullLine = ""
309 bSlash = "\\";
310 for line in os.popen(cmd).readlines(): # run command
311 line = line[:-1]
312 # get ride of empty or commented lines
313 if line == '' or line[0] == '#':
314 continue
315
316 # join lines
317 if join == 1:
318 fullLine += line
319 else:
320 fullLine = line
321
322 # determine if finished or more is coming
323 if fullLine[-1] == bSlash:
324 join = 1
325 fullLine = fullLine[:-1]
326 else:
327 join = 0
328 # test whether there is a directory
329 names = fullLine.split() # splitting every blank
330 if names[0] == cmsDataset or names[1] == mitDataset:
331 cmsDataset = names[0] # CMS name of the dataset
332 mitDataset = names[1] # equivalent MIT name of the dataset
333 nevents = int(names[2]) # number of events to be used in the production
334 if names[4] != "-":
335 localPath = names[4]
336 print "\n Sample info from database %s for CMSSW config %s\n %s"\
337 %(database,cmsswPy,fullLine)
338 if len(names) == 6:
339 dbs = names[5]
340 dbs = 'https://cmsdbsprod.cern.ch:8443/cms_dbs_' + dbs + '/servlet/DBSServlet'
341 print ' dbs: ' + dbs + '\n'
342 else:
343 print ''
344
345 if mitDataset == None or cmsDataset == None:
346 print "ERROR - dataset not defined."
347 sys.exit(1)
348
349 # Prepare file based processing input
350 runFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'run.sh'
351 if not os.path.exists(runFile):
352 cmd = "Run file not found: %s" % runFile
353 raise RuntimeError, cmd
354 cmd = 'cp ' + runFile + ' ./'
355 os.system(cmd)
356 writeCfgFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'writeCfg.py'
357 cmd = 'cp ' + writeCfgFile + ' ./'
358 os.system(cmd)
359
360 lfnFile = mitCfg + '/' + version + '/' + mitDataset + '.lfns'
361 if os.path.exists(lfnFile):
362 print "\n INFO -- Lfn file found: %s. This means someone already worked on this dataset.\n" % lfnFile
363 if not useExistingLfns:
364 cmd = 'rm ' + lfnFile
365 os.system(cmd)
366
367 # recreate if requested or not existing
368 if not useExistingLfns or not os.path.exists(lfnFile):
369 cmd = 'input.py --option=lfn --dataset=' + cmsDataset + ' > ' + lfnFile
370 print ' Input: ' + cmd + '\n'
371 os.system(cmd)
372
373 # Create the corresponding crab task
374 crabTask = task.Task(crabId,cmsDataset,mitCfg,version)
375 crabTask.storagePath = findStoragePath(mitCfg,version,mitDataset)
376 crabTask.loadAllLfns(lfnFile)
377 crabTask.loadCompletedLfns()
378 crabTask.createMissingLfns(lfnFile,lfnFile + '_' + crabTask.tag)
379 if crabTask.nLfnMissing == 0:
380 print ' All requested LFNs are available. EXIT now.'
381 sys.exit()
382
383 crabTask.createSubTasks(lfnFile + '_' + crabTask.tag)
384 cmd = 'cp ' + lfnFile + '_' + crabTask.tag + '_*' + ' ./'
385 os.system(cmd)
386
387 nevents = len(crabTask.lfns)
388
389 # Say what we do now
390 print ' Preparing dataset: ' + cmsDataset + ' [MIT: ' + mitDataset + ' with ' + str(nevents) + \
391 ' total jobs(=lfns), nEvtsTotal: %d]' % crabTask.nTotalEvts
392
393 # --------------------------------------------------------------------------------------------------
394 # Prepare the config files
395 # --------------------------------------------------------------------------------------------------
396 # Cleaning up
397 cmd = "rm -f crab_" + crabTask.tag + ".cfg crab_" + crabTask.tag + ".cfg-Template " + cmsswPy
398 os.system(cmd)
399
400 # Parse template input and write the crab configuration file
401 fileInput = open(crabFile,'r')
402 fileOutput = open("crab_" + crabTask.tag + ".cfg-Template",'w')
403 line = fileInput.readline()
404 while (line != ''):
405 if line[0] != '#':
406 line = searchReplace(line,mitCfg,version,mitDataset, \
407 cmsDataset,cmsswPy,dbs,sched,blacklist,skpEvts)
408 fileOutput.write(line)
409 line = fileInput.readline()
410 fileInput .close()
411 fileOutput.close()
412
413 # Parse template input and write the crab configuration file
414 fileInput = open(cmsswFile,'r')
415 fileOutput = open(cmsswPy,'w')
416 line = fileInput.readline()
417 while (line != ''):
418 if line[0] != '#':
419 line = searchReplace(line,mitCfg,version,mitDataset, \
420 cmsDataset,cmsswPy,dbs,sched,blacklist,skpEvts)
421 fileOutput.write(line)
422 line = fileInput.readline()
423 fileInput .close()
424 fileOutput.close()
425
426 # --------------------------------------------------------------------------------------------------
427 # Job creation and submission
428 # --------------------------------------------------------------------------------------------------
429 print '\n Using CMSSW version: ' + os.environ['CMSSW_VERSION']
430 print ' Using CRAB version: ' + os.environ['CRAB_VERS'] + '\n'
431
432 pattern1 = 'working directory'
433 pattern2 = 'Total of '
434
435 # Go through the crabTask and submit each subtask to the grid
436 for subTask in crabTask.subTasks:
437
438 tag = crabTask.tag + '_' + subTask.tag()
439 print ' Working on subtask: ' + tag
440
441 nJobsTotal = 0
442 crabIdCheck = ''
443
444 # adjust crab config
445 fileInput = open("crab_" + crabTask.tag + ".cfg-Template",'r')
446 fileOutput = open("crab_" + crabTask.tag + ".cfg",'w')
447 line = fileInput.readline()
448 while (line != ''):
449 if line[0] != '#':
450 line = adjustCfg(line,subTask.nSubTaskLfn,tag)
451 fileOutput.write(line)
452 line = fileInput.readline()
453 fileInput .close()
454 fileOutput.close()
455
456 # ----------------------------------------------------------------------------------------------
457 # Deal with storage element area
458 # ----------------------------------------------------------------------------------------------
459 # find the forseen storage place
460 cmd = 'grep ^storage_element crab_' + crabTask.tag + '.cfg-Template'
461 for file in os.popen(cmd).readlines(): # run command
462 line = file[:-1] # strip '\n'
463 # decode the storage element name
464 names = line.split("=") # splitting every '='
465 storageEle = names.pop()
466 storageEle = re.sub("\s", "",storageEle)
467 cmd = 'grep ^storage_path crab_' + crabTask.tag + '.cfg-Template'
468 for file in os.popen(cmd).readlines(): # run command
469 line = file[:-1] # strip '\n'
470 # decode the storage directory name
471 names = line.split("=") # splitting every '='
472 names = names[1:]
473 storagePath = "=".join(names)
474 storagePath = re.sub("\s", "",storagePath)
475 cmd = 'grep ^user_remote_dir crab_' + crabTask.tag + '.cfg-Template'
476 for file in os.popen(cmd).readlines(): # run command
477 line = file[:-1] # strip '\n'
478 # decode the storage directory name
479 names = line.split(" ") # splitting every '='
480 storagePath += names[-1]
481 storageUrl = 'srm://' + storageEle + ':8443' + storagePath
482
483 storagePath = adjustCfg(storagePath,subTask.nSubTaskLfn,tag)
484 storageUrl = adjustCfg(storageUrl, subTask.nSubTaskLfn,tag)
485
486 print ' StorageUrl: ' + storageUrl
487
488 # Create storage area
489 if test == 0: # for testing we do not create the directories
490 if (storageEle == 'srm.cern.ch'):
491 createDirCern(storagePath)
492 else:
493 createDirGeneral(storageEle,storagePath)
494
495 # cmd = "crab -create -debug 3 -USER.ui_working_dir=" + tag + " | tee forDaniele "
496 cmd = "crab -create -cfg crab_" + crabTask.tag + ".cfg -USER.ui_working_dir=" + tag
497 print ' -> ' + cmd
498 if test != 0:
499 cmd = 'echo ' + cmd
500 print ' ' + cmd
501 for line in os.popen(cmd).readlines(): # run command
502 line = line[:-1] # strip '\n'
503 print ' CRAB: ' + line
504 if re.search(pattern1,line):
505 f = line.split("/")
506 crabIdCheck = f[-2]
507 if re.search(pattern2,line):
508 f = line.split(" ")
509 nJobsTotal = f[4]
510 if nJobsTotal == '':
511 nJobsTotal = f[2]
512 # report
513 print ' --> %s jobs created (%s).\n'%(nJobsTotal,tag)
514 # and cleanup the temporary file for the subtask
515 cmd = "rm -f " + mitDataset + ".lfns_" + tag
516 os.system(cmd)
517
518 # adjust arguments
519 cmd = 'input.py --db=' + lfnFile + '_' + tag + ' --option=xml --dataset=' + cmsDataset + \
520 ' > ' + tag + '/share/arguments.xml'
521 print ' update arguments: ' + cmd
522 if test == 0:
523 os.system(cmd)
524
525 # loop through the file and determine the submission parameters
526 block = ''
527 blocks = []
528 idx = 0
529 minIdxs = []
530 maxIdxs = []
531
532 fileInput = open(lfnFile + '_' + tag,'r')
533 line = fileInput.readline()
534 while (line != ''):
535 line = line[:-1]
536 if line[0] != '#':
537 idx += 1
538 f = line.split()
539 block = f[0]
540 lfn = f[1]
541 nEvents = f[2]
542 if len(blocks) == 0 or block != blocks[-1]:
543 # new block found
544 blocks .append(block)
545 minIdxs.append(idx)
546 maxIdxs.append(idx)
547 else:
548 maxIdxs[-1] = idx
549 # keep the reading going
550 line = fileInput.readline()
551 fileInput .close()
552
553 # merge blocks together if they use the same sites
554 lastSites = ''
555 idx = 0
556 mergedIdx = 0
557 mergedBlocks = []
558 mergedSites = []
559 mergedMinIdxs = []
560 mergedMaxIdxs = []
561 print '\n Show the unmerged blocks:'
562 for block in blocks:
563 cmd = "sites.py --block=" + block
564 for line in os.popen(cmd).readlines(): # run command
565 line = line[:-1]
566 sites = line
567
568 sites = translator.translateSes(sites)
569 #sites = translator.selectPreferred()
570
571 print ' Block ' + block + ' process: %d to %d'%(minIdxs[idx],maxIdxs[idx]) + \
572 ' at\n > ' + sites
573 # block with different sites found
574 if sites != lastSites:
575 mergedSites .append(sites)
576 mergedBlocks .append(blocks [idx]) # only the first block with these sites is stored
577 mergedMinIdxs.append(minIdxs[idx])
578 mergedMaxIdxs.append(maxIdxs[idx])
579 lastSites = sites
580 mergedIdx += 1
581 else:
582 mergedMaxIdxs[mergedIdx-1] = maxIdxs[idx]
583
584 # last action in the loop: increment the unmerged blocks
585 idx += 1
586
587 # Show already what we will do
588 idx = 0
589 print '\n Show the merged blocks:'
590 for block in mergedBlocks:
591 print ' Merged Block ' + block + ' process: %d to %d'\
592 %(mergedMinIdxs[idx],mergedMaxIdxs[idx]) + \
593 ' at\n > ' + mergedSites[idx]
594 # last action in the loop: increment the merged blocks
595 idx += 1
596
597 # perfrom the submission block by block (using the merged blocks of course)
598 nSubmission = len(mergedBlocks)
599 idx = 0
600 print '\n Submit the merged blocks:'
601 for block in mergedBlocks:
602 print ' Merged Block ' + block + ' process: %d to %d'\
603 %(mergedMinIdxs[idx],mergedMaxIdxs[idx]) + \
604 ' at\n > ' + mergedSites[idx]
605
606 nSubmit = '%d-%d'%(mergedMinIdxs[idx],mergedMaxIdxs[idx])
607 if mergedMinIdxs[idx] == mergedMaxIdxs[idx]:
608 nSubmit = '%d,%d'%(mergedMinIdxs[idx],100000000)
609 cmd = 'crab -submit %s -continue %s -GRID.ce_white_list=%s'%(nSubmit,tag,mergedSites[idx])
610 print ' ' + cmd + '\n'
611 status = os.system(cmd)
612 while status != 0:
613 print ' Submission failed (%s) --> retry'%(cmd)
614 status = os.system(cmd)
615
616 # last action in the loop: increment the merged blocks
617 idx += 1
618
619 print ' Number of blocks submitted: %d' % nSubmission
620
621 # and cleanup the temporary file for the task
622 cmd = "rm -f crab_" + crabTask.tag + ".cfg crab_" + crabTask.tag + ".cfg-Template " \
623 + cmsswPy + ' ' + cmsswPy + 'c' + ' writeCfg.py run.sh'
624 os.system(cmd)
625
626 print ' Done... keep watching it...'
627 sys.exit(0)
628
629
630 ## if nSubmit != -1:
631 ## cmd = 'crab -continue %s -submit %s'%(crabId,nSubmit)
632 ## print 'SUBMIT.PY: ' + cmd
633 ## status = os.system(cmd)
634 ## if status == 0:
635 ## print ' --> job submitted\n'
636 ## else:
637 ## leftOver = int(nJobsTotal)
638 ## nSubmitted = 0
639 ## nSubBatch = 80
640 ## while (nSubmitted < int(nJobsTotal)):
641 ## if leftOver < nSubBatch:
642 ## nSubBatch = leftOver
643 ##
644 ## cmd = 'crab -continue %s -submit %d'%(crabId,nSubBatch)
645 ## print 'SUBMIT.PY: ' + cmd
646 ## status = os.system(cmd)
647 ## if status == 0:
648 ## print ' --> %d job submitted\n'%(nSubBatch)
649 ## leftOver = leftOver - nSubBatch
650 ## nSubmitted = nSubmitted + nSubBatch
651 ##
652 ## #print 'SUBMIT.PY: ' + cmd
653 ## #status = os.system(cmd)
654 ## #if status == 0:
655 ## # print ' --> job submitted\n'
656 ##
657
658 # --------------------------------------------------------------------------------------------------
659 # Run a test job to test the configuration and measure the expected output size
660 # --------------------------------------------------------------------------------------------------
661 if noTestJob == 0:
662 #-----------------------------------------------------------------------------------------------
663 # use a specific file for test
664 #-----------------------------------------------------------------------------------------------
665 # first check whether we already have a local file
666 print '\n Try to find an existing local file using "find"'
667 f = cmsDataset.split("/")
668 name = f[1]
669 vers = f[2]
670 tier = f[3]
671 file = ""
672 lfn = ""
673 cmd = 'find ./store/ -name ' + name
674 print 'Searching: ' + cmd
675 for line in os.popen(cmd).readlines(): # run command
676 file = line[:-1] # strip '\n'
677
678 # looks like there could be a file we found a directory, confirm
679 if file != "":
680 print '\n We have a directory ' + file + ' .. confirming'
681 file = file + '/' + tier
682 cmd = 'find ' + file + ' -name \*.root'
683 for line in os.popen(cmd).readlines(): # run command
684 if line.find(vers):
685 file = line[:-1] # strip '\n'
686 lfn = file[1:]
687
688 if os.path.exists(file):
689 print ' moving on with locally found file: \n' + ' ' + file
690 else:
691 print ' no local file found'
692 file = ""
693 lfn = ""
694
695 # now try to see whether we can find a file to download
696 if file == "":
697 cmd = './bin/findLfn.py --input=' + cmsDataset + ' | grep /store/'
698 print '\n Find an LFN to download: ' + cmd
699 for line in os.popen(cmd).readlines(): # run command
700 if line.find("/store") != -1:
701 lfn = line[:-1] # strip '\n'
702 break
703 if lfn == "":
704 print "\n WARNING: No file found, continue assuming it is a simulation job.\n\n"
705 else:
706 print ' --> LFN: ' + lfn
707 file = '.' + lfn
708
709 if os.path.exists(file):
710 print ' --> File already exists: ' + file
711 else:
712 cmd = './bin/downloadLfn.py ' + lfn
713 print ' --> downloading: ' + cmd
714 status = os.system(cmd)
715 if status != 0:
716 print ' ERROR - failed to copy LFN. EXIT now!'
717 sys.exit(1)
718
719 # Parse template input and adjust the input file to the newly copied lfn
720 fileInput = open(cmsswPy,'r')
721 fileOutput = open("test-"+cmsswPy,'w')
722 line = fileInput.readline()
723 while (line != ''):
724 if line.find("file:") != -1:
725 line = '"file:' + lfn[1:] + '"\n'
726 fileOutput.write(line)
727 line = fileInput.readline()
728 fileInput .close()
729 fileOutput.close()
730
731 # Setting the number of events (hard coded in the file so far)
732 nTryEvts = 1000.
733
734 print '\n --> Please wait, running a test job now! Should be short (trying %.0f'%nTryEvts + \
735 ' evts). Check log: cmssw.log'
736 cmd = 'rm -f cmssw.log; /usr/bin/time --format "%e %U %S" cmsRun test-' + cmsswPy + \
737 ' >& cmssw.log'
738 print ' CMD: ' + cmd
739 status = os.system(cmd)
740
741 cmd = 'tail -1 cmssw.log'
742 for file in os.popen(cmd).readlines(): # run command
743 line = file[:-1] # strip '\n'
744 f = line.split() # splitting every blank
745 rtime = float(f[0]) # wall clock time
746 utime = float(f[1]) # user time
747 stime = float(f[2]) # system time
748
749 nEvtsTest = 1000
750 cmd = 'grep \'Begin processing\' cmssw.log | tail -1'
751 for file in os.popen(cmd).readlines(): # run command
752 line = file[:-1] # strip '\n'
753 # test whether there is a directory
754 f = line.split() # splitting every blank
755 nEvtsTest = f[3] # this is the number of records processed
756 nEvtsTest = int(nEvtsTest[:-2]) # strip 'th'
757 cmd = 'ls -s ' + mitDataset + '*.root'
758 size = 0
759 for file in os.popen(cmd).readlines(): # run command
760 line = file[:-1] # strip '\n'
761 f = line.split() # splitting every blank
762 size += int(f[0])/1000. # size in MB
763
764 cmd = 'tail -1 cmssw.log'
765 for file in os.popen(cmd).readlines(): # run command
766 line = file[:-1] # strip '\n'
767 # get total, user and system times
768 names = line.split() # splitting every blank
769
770 if nEvtsTest != nTryEvts:
771 print ' WARNING - Instead of %f did %d'%(nTryEvts,nEvtsTest)
772
773 print ' '
774 print ' Number of test events produced: %d'%nEvtsTest
775 print ' File size for all events: %.2f MB'%size
776 print ' Processing time for all events: %.2f secs (u: %.2f s: %.2f)'%(rtime,utime,stime)
777 print ' '
778 print ' --> 1 event == %.2f secs'%(rtime/nEvtsTest)
779 print ' --> 1.00 GB == %d events'%(nEvtsTest/size*1024.)
780 print ' --> %.2f GB == %d events'%(nevents/(nEvtsTest/size*1024.),nevents)
781 print ' '
782
783 if testJob == 1:
784 print '\n Test job finished, stopping now.\n'
785 sys.exit(0)
786
787
788 ## # are we just completing an existing production? and is there something to complete?
789 ## if complete == 1:
790 ## f = storagePath.split('=')
791 ## rfDir = f[-1]
792 ## #cmd = 'castorInventory.py --nJobs=%s %s | grep Missing'%(nJobsTotal,rfDir)
793 ## cmd = 'castorInventory.py --nJobs=%s %s | grep Missing'%(nJobsTotal,storageUrl)
794 ## #print ' CMD: ' + cmd
795 ## for line in os.popen(cmd).readlines(): # run command
796 ## line = line[:-1] # strip '\n'
797 ## f = line.split(':')
798 ## nSubmit = f[1].strip()
799 ## f = nSubmit.split(',')
800 ## if len(f) == 0 or nSubmit == '':
801 ## print ' No more jobs left it seems, nSubmit=' + nSubmit
802 ## cmd = 'rm -rf ' + crabId
803 ## print ' Cleanup: ' + cmd + '\n\n'
804 ## status = os.system(cmd)
805 ## sys.exit(0)
806 ## elif len(f) == 1:
807 ## nInvalid = str(int(nJobsTotal) + 1000)
808 ## print ' One more jobs left, careful, adjusted, nSubmit=' + nSubmit
809 ## nSubmit = nSubmit + ',' + nInvalid
810 ##
811 ## ### nSubmit = ",".join(f[:-1])
812 ## print ' Missing jobs are: ' + nSubmit
813 ##
814 ## sys.exit(0)