ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/submit.py
Revision: 1.12
Committed: Mon Sep 19 21:45:41 2011 UTC (13 years, 7 months ago) by paus
Content type: text/x-python
Branch: MAIN
CVS Tags: Mit_025pre2
Changes since 1.11: +152 -95 lines
Log Message:
Reinstate the bin and python areas.

File Contents

# User Rev Content
1 paus 1.2 #!/usr/bin/env python
2     #---------------------------------------------------------------------------------------------------
3     # Script to test, create and submit one complete production task
4     #
5     # Complete refers here to the proper preparation of the ultimate storage location (a storge element
6     # with a given storage path etc.), the submission of a test job to evaluate timing and data output
7     # size and finally the creation of the job configurations and the submission of the jobs to the grid
8     # sites. The grid is accessed via crab tools.
9     #
10     # While the script is pretty complete it has become a bit long and messy. Some cleanup will
11     # hopefully soon be performed.
12     #
13     # Author: C.Paus (July 1, 2008)
14     #---------------------------------------------------------------------------------------------------
15     import os,sys,getopt,re,string
16 paus 1.3 import task,translator
17 paus 1.2
18     #===================================================================================================
19 paus 1.12 def domain():
20     d = os.uname()[1]
21     f = d.split('.')
22     return '.'.join(f[1:])
23    
24     def storage(seFile):
25     # decide on the forseen default storage place (where are we running?)
26     storageTag = 'T2_US_MIT'
27     dom = domain()
28     if re.search('mit.edu',dom):
29     storageTag = 'T2_US_MIT'
30     elif re.search('cern.ch',dom):
31     storageTag = 'T0_CH_CERN'
32     cmd = 'grep ^' + storageTag + ' ' + seFile
33     for line in os.popen(cmd).readlines(): # run command
34     #print ' LINE: ' + line
35     line = line[:-1] # strip '\n'
36     line = line.replace(' ','')
37     f = line.split(':')
38     storageEle = f[1]
39     storagePath = f[2]
40     userRemoteDir = f[3]
41     return storageEle + ':' + storagePath + ':' + userRemoteDir
42    
43     #===================================================================================================
44     def getFiles(mitCfg,version):
45     # Prepare file based processing input
46     runFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'run.sh'
47     if not os.path.exists(runFile):
48     cmd = "Run file not found: %s" % runFile
49     raise RuntimeError, cmd
50     cmd = 'cp ' + runFile + ' ./'
51     os.system(cmd)
52     writeCfgFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'writeCfg.py'
53     cmd = 'cp ' + writeCfgFile + ' ./'
54     if not os.path.exists(writeCfgFile):
55     cmd = "Write Cfg file not found: %s" % writeCfgFile
56     raise RuntimeError, cmd
57     os.system(cmd)
58    
59     #===================================================================================================
60     def makeLfnFile(mitCfg,version,mitDataset,dbs,useExistingLfns):
61     lfnFile = mitCfg + '/' + version + '/' + mitDataset + '.lfns'
62     if os.path.exists(lfnFile):
63     print "\n INFO -- Lfn file found: %s. Someone already worked on this dataset.\n" % lfnFile
64     if not useExistingLfns:
65     cmd = 'rm ' + lfnFile
66     os.system(cmd)
67    
68     # recreate if requested or not existing
69     if not useExistingLfns or not os.path.exists(lfnFile):
70     cmd = 'input.py --dbs=' + dbs + ' --option=lfn --dataset=' + cmsDataset + ' > ' + lfnFile
71     print ' Input: ' + cmd + '\n'
72     os.system(cmd)
73    
74     return lfnFile
75    
76     #===================================================================================================
77     def searchReplace(line,mitCfg,version,mitDataset,storage, \
78 paus 1.2 cmsDataset='X',cmsswPy='X',dbs='X',sched='X',blacklist='X',skpEvts='X'):
79 paus 1.12 # decode storage variable
80     st = storage.split(':')
81 paus 1.2 # compile search and replacement sequences
82     pCmsDset = re.compile('XX-CMSDATASET-XX')
83     pMitDset = re.compile('XX-MITDATASET-XX')
84     pCmsswPy = re.compile('XX-CMSSWPY-XX')
85     pMitCfg = re.compile('XX-MITCFG-XX')
86     pMitVers = re.compile('XX-MITVERSION-XX')
87 paus 1.12 pMitSe = re.compile('XX-MITSE-XX')
88     pMitSPath = re.compile('XX-MITSPATH-XX')
89     pMitRDir = re.compile('XX-MITRDIR-XX')
90 paus 1.2 pDbs = re.compile('XX-DBS-XX')
91     pSched = re.compile('XX-SCHED-XX')
92     pBlacklist = re.compile('XX-BLACKLIST-XX')
93     pSkpEvts = re.compile('XX-SKIPEVTS-XX')
94     # perform all search and replaces
95     line = pCmsDset .sub(cmsDataset, line);
96     line = pMitDset .sub(mitDataset, line);
97     line = pCmsswPy .sub(cmsswPy, line);
98     line = pMitCfg .sub(mitCfg, line);
99     line = pMitVers .sub(version, line);
100 paus 1.12 line = pMitSe .sub(st[0], line);
101     line = pMitSPath .sub(st[1], line);
102     line = pMitRDir .sub(st[2], line);
103 paus 1.2 line = pDbs .sub(dbs, line);
104     line = pSched .sub(sched, line);
105     line = pBlacklist.sub(blacklist, line);
106     return line
107    
108     #===================================================================================================
109     def adjustCfg(line,nevents,crabId):
110     # compile search and replacement sequences
111     pNevents = re.compile('XX-NEVENTS-XX')
112     pCrabId = re.compile('XX-CRABID-XX')
113     # perform all search and replaces
114     line = pNevents .sub(str(nevents),line);
115     line = pCrabId .sub(crabId, line);
116     return line
117    
118     #===================================================================================================
119 paus 1.12 def findStoragePath(mitCfg,version,mitDataset,seFile):
120 paus 1.2 # find the forseen storage place
121 paus 1.4 cmd = 'grep ^storage_path ' + os.environ['MIT_PROD_DIR'] \
122     + '/' + mitCfg + '/' + version + '/crab.cfg'
123 paus 1.2 for file in os.popen(cmd).readlines():
124     line = file[:-1]
125 paus 1.12 line = searchReplace(line,mitCfg,version,mitDataset,storage(seFile));
126 paus 1.2 # decode the storage directory name
127     names = line.split("=")
128     names = names[1:]
129     storagePath = "=".join(names)
130     storagePath = re.sub("\s", "",storagePath)
131 paus 1.4 cmd = 'grep ^user_remote_dir ' + os.environ['MIT_PROD_DIR'] \
132     + '/' + mitCfg + '/' + version + '/crab.cfg'
133 paus 1.2 for file in os.popen(cmd).readlines():
134     line = file[:-1]
135 paus 1.12 line = searchReplace(line,mitCfg,version,mitDataset,storage(seFile));
136 paus 1.2 # decode the storage directory name
137     names = line.split(" ")
138     storagePath += names[-1]
139     return storagePath
140    
141     #===================================================================================================
142     def createDirCern(storagePath):
143     # check whether path exists
144     cmd = 'rfdir ' + storagePath + ' >& /dev/null'
145     status = os.system(cmd)
146    
147     # create it if missing
148     if status == 0:
149     print ' Castor directory exists: ' + storagePath + '\n --> Moving on.'
150     else:
151     print ' Castor directory needs to be created.'
152     cmd = 'rfmkdir -p ' + storagePath
153     status = os.system(cmd)
154     if status == 0:
155     print ' --> Created: ' + storagePath
156    
157     # always set the permissions
158     cmd = 'rfchmod 777 ' + storagePath
159     status = os.system(cmd)
160     if status == 0:
161     print ' --> Set permissions: 777\n'
162     else:
163     print ' --> Setting permissions failed. EXIT now.\n'
164     sys.exit(1)
165    
166     #===================================================================================================
167 paus 1.5 def create(path):
168     status = -1
169     if re.search('/pnfs/cmsaf.mit.edu/t2bat',path):
170     f = path.split('=')
171     path = f[-1]
172     #f = path.split('/')
173     #path1 = "/".join(f[:-1])
174     #cmd = 'ssh paus@cgate mkdir -p ' + path1
175     cmd = 'ssh paus@cgate mkdir -p ' + path
176     status = os.system(cmd)
177     cmd = 'ssh paus@cgate chmod 777 ' + path
178     status = os.system(cmd)
179     return status
180    
181     #===================================================================================================
182 paus 1.2 def createDirGeneral(storageEle,storagePath):
183     # create all relevant subdirectories
184     f = storagePath.split('/') # splitting every '/'
185     storagePath2 = "/".join(f[:-1])
186     storagePath1 = "/".join(f[:-2])
187     storagePath0 = "/".join(f[:-3])
188    
189 paus 1.5 if create(storagePath) == 0:
190     print ' '
191     print ' Directory was created at MIT.\n'
192     return
193    
194 paus 1.2 # set the storage URL
195     storageUrl = 'srm://' + storageEle + ':8443' + storagePath
196    
197     # check whether path exists
198     cmd = 'srmls ' + storageUrl + ' >& /dev/null'
199     status = os.system(cmd)
200    
201     # create it only if missing
202     if status == 0:
203     print ' '
204     print ' Directory already found.... moving on.'
205    
206     else:
207     # create all relevant directories
208     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath0 + ' >& /dev/null'
209     print ' srmmkdir: ' + cmd
210     status = os.system(cmd)
211     print ' srmmkdir: status %d'%(status)
212     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath1 + ' >& /dev/null'
213     print ' srmmkdir: ' + cmd
214     status = os.system(cmd)
215     print ' srmmkdir: status %d'%(status)
216     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath2 + ' >& /dev/null'
217     print ' srmmkdir: ' + cmd
218     status = os.system(cmd)
219     print ' srmmkdir: status %d'%(status)
220    
221     # create the main storage directory
222     cmd = 'srmmkdir srm://' + storageEle + ':8443' + storagePath + ' >& /dev/null'
223     print ' srmmkdir: ' + cmd
224     status = os.system(cmd)
225     print ' srmmkdir: status %d'%(status)
226     if status == 0:
227     print ' '
228     print ' Directory was created.'
229     else:
230     print ' '
231     print ' '
232     print ' '
233     print ' ERROR - failed to create the Storage Area.'
234     print ' '
235     print ' '
236    
237     print ' Check permissions with: srmls -l -count=1 ' + storageUrl + '\n'
238     cmd = 'srmls -l -count=1 ' + storageUrl + ' | grep GroupPermission | grep RWX'
239     status = -1
240     for line in os.popen(cmd).readlines(): # run command
241     line = line[:-1]
242     print " Permissions? " + line
243     status = 0
244     #status = os.system(cmd)
245     print ' '
246    
247     if status != 0:
248     print ' --> Directory creation or permissions failed. EXIT now.\n'
249     sys.exit(1)
250    
251     #===================================================================================================
252     # Main starts here
253     #===================================================================================================
254     # Define string to explain usage of the script
255     usage = "Usage: submit.py --cmsDataset=<name> | --mitDataset=<name>\n"
256     usage += " --cmssw=<name>\n"
257     usage += " --mitCfg=<name>\n"
258     usage += " --version=<version>\n"
259     usage += " --dbs=<name>\n"
260     usage += " --sched=<name>\n"
261     usage += " --blacklist=<name>\n"
262     usage += " --nSubmit=<submittedJobs>\n"
263     usage += " --skipEvents=<'nRunX:nEventY','nRunXX:nEventYY',...>\n"
264 paus 1.12 usage += " --fixSites=<siteList>\n"
265 paus 1.2 usage += " --complete\n"
266     usage += " --testJob\n"
267     usage += " --noTestJob\n"
268     usage += " --help\n"
269    
270     # Define the valid options which can be specified and check out the command line
271     valid = ['cmsDataset=','mitDataset=','cmssw=','mitCfg=','version=','dbs=','sched=','blacklist=',
272 paus 1.12 'nSubmit=','skipEvents=','fixSites=','complete','useExistingLfns','testJob','noTestJob',
273     'test','help']
274 paus 1.2 try:
275     opts, args = getopt.getopt(sys.argv[1:], "", valid)
276     except getopt.GetoptError, ex:
277     print usage
278     print str(ex)
279     sys.exit(1)
280    
281     # --------------------------------------------------------------------------------------------------
282     # Get all parameters for the production
283     # --------------------------------------------------------------------------------------------------
284 paus 1.8 # crab id
285 paus 1.2 cmd = "date +crab_0_%y%m%d_%H%M%S"
286     for line in os.popen(cmd).readlines(): # run command
287     line = line[:-1]
288     crabId = line
289     print "\n This job will be CrabId: " + crabId + "\n"
290 paus 1.8 # Set defaults for each option
291     cmsDataset = None
292     mitDataset = None
293     cmssw = "cmssw"
294     mitCfg = "filefi"
295     version = "014"
296     #dbs = "https://cmsdbsprod.cern.ch:8443/cms_dbs_prod_global/servlet/DBSServlet"
297     dbs = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
298     sched = "glite"
299     blacklist = ""
300     nSubmit = -1
301     skpEvts = ''
302 paus 1.12 fixSites = ''
303 paus 1.8 complete = 0
304     useExistingLfns = False
305     noTestJob = 0
306     testJob = 0
307     test = 0
308 paus 1.2
309     # Read new values from the command line
310     for opt, arg in opts:
311     if opt == "--help":
312     print usage
313     sys.exit(0)
314     if opt == "--cmsDataset":
315 paus 1.8 cmsDataset = arg
316 paus 1.2 if opt == "--mitDataset":
317 paus 1.8 mitDataset = arg
318 paus 1.2 if opt == "--cmssw":
319 paus 1.8 cmssw = arg
320 paus 1.2 if opt == "--mitCfg":
321 paus 1.8 mitCfg = arg
322 paus 1.2 if opt == "--version":
323 paus 1.8 version = arg
324 paus 1.2 if opt == "--dbs":
325 paus 1.8 dbs = arg
326 paus 1.2 if opt == "--sched":
327 paus 1.8 sched = arg
328 paus 1.2 if opt == "--blacklist":
329 paus 1.8 blacklist = arg
330 paus 1.2 if opt == "--nSubmit":
331 paus 1.8 nSubmit = arg
332 paus 1.2 if opt == "--skipEvents":
333 paus 1.8 skpEvts = arg
334 paus 1.12 if opt == "--fixSites":
335     fixSites = arg
336 paus 1.2 if opt == "--complete":
337 paus 1.8 complete = 1
338     if opt == "--useExistingLfns":
339     useExistingLfns = True
340 paus 1.2 if opt == "--noTestJob":
341 paus 1.8 noTestJob = 1
342     testJob = 0
343 paus 1.2 if opt == "--testJob":
344 paus 1.8 testJob = 1
345     noTestJob = 0
346 paus 1.2 if opt == "--test":
347 paus 1.8 test = 1
348 paus 1.2
349     # Make sure we have the right 'database' and the right config file
350     database = 'Productions'
351 paus 1.3 cmsswPy = cmssw + '_' + crabId + '.py'
352 paus 1.2 if cmssw != 'cmssw':
353     database += '.' + cmssw
354    
355     # Deal with obvious problems
356     if cmsDataset == None and mitDataset == None:
357     cmd = "--cmsDataset | --mitDataset options not provided. One of them is required."
358     raise RuntimeError, cmd
359 paus 1.12 seFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/'+ version + '/seTable'
360     if not os.path.exists(seFile):
361     cmd = "Storage element table not found: %s" % seFile
362     raise RuntimeError, cmd
363 paus 1.3 crabFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'crab.cfg'
364 paus 1.2 if not os.path.exists(crabFile):
365     cmd = "Crab file not found: %s" % crabFile
366     raise RuntimeError, cmd
367 paus 1.3 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmssw + '.py'
368 paus 1.2 if not os.path.exists(cmsswFile):
369     cmd = "Cmssw file not found: %s" % cmsswFile
370     cmd = " XXXX ERROR no valid configuration found XXXX"
371     raise RuntimeError, cmd
372    
373 paus 1.3
374     # Prepare the ce/se translator
375 paus 1.12 trans = translator.Translator(os.environ['MIT_PROD_DIR']+'/'+mitCfg+'/'+version+'/ceTable',
376     os.environ['MIT_PROD_DIR']+'/'+mitCfg+'/'+version+'/seTable',
377     os.environ['MIT_PROD_DIR']+'/'+mitCfg+'/'+version+'/preferredSites')
378    
379     ## Resolve the other mitCfg parameters from the configuration file
380     #cmd = 'cat ' + os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + database
381     #join = 0
382     #fullLine = ""
383     #bSlash = "\\";
384     #for line in os.popen(cmd).readlines(): # run command
385     # line = line[:-1]
386     # # get ride of empty or commented lines
387     # if line == '' or line[0] == '#':
388     # continue
389     #
390     # # join lines
391     # if join == 1:
392     # fullLine += line
393     # else:
394     # fullLine = line
395     #
396     # # determine if finished or more is coming
397     # if fullLine[-1] == bSlash:
398     # join = 1
399     # fullLine = fullLine[:-1]
400     # else:
401     # join = 0
402     # # test whether there is a directory
403     # names = fullLine.split() # splitting every blank
404     # if names[0] == cmsDataset or names[1] == mitDataset:
405     # cmsDataset = names[0] # CMS name of the dataset
406     # mitDataset = names[1] # equivalent MIT name of the dataset
407     # nevents = int(names[2]) # number of events to be used in the production
408     # if names[4] != "-":
409     # localPath = names[4]
410     # print "\n Sample info from database %s for CMSSW config %s\n %s"\
411     # %(database,cmsswPy,fullLine)
412     # if len(names) == 6:
413     # dbs = names[5]
414     # dbs = 'http://cmsdbsprod.cern.ch/cms_dbs_' + dbs + '/servlet/DBSServlet'
415     # print ' dbs: ' + dbs + '\n'
416     # else:
417     # print ''
418 paus 1.3
419 paus 1.12 # Create the corresponding crab task
420     crabTask = task.Task(crabId,cmsDataset,mitDataset,mitCfg,version,cmssw)
421     if crabTask.mitDataset == 'undefined' or crabTask.cmsDataset == 'undefined':
422 paus 1.2 print "ERROR - dataset not defined."
423     sys.exit(1)
424 paus 1.12 else:
425     mitDataset = crabTask.mitDataset
426     cmsDataset = crabTask.cmsDataset
427     dbs = crabTask.dbs
428 paus 1.2
429 paus 1.12 getFiles(mitCfg,version)
430     lfnFile = makeLfnFile(mitCfg,version,mitDataset,dbs,useExistingLfns)
431 paus 1.8
432 paus 1.12 crabTask.storagePath = findStoragePath(mitCfg,version,mitDataset,seFile)
433 paus 1.2 crabTask.loadAllLfns(lfnFile)
434     crabTask.loadCompletedLfns()
435     crabTask.createMissingLfns(lfnFile,lfnFile + '_' + crabTask.tag)
436 paus 1.4 if crabTask.nLfnMissing == 0:
437     print ' All requested LFNs are available. EXIT now.'
438     sys.exit()
439    
440 paus 1.2 crabTask.createSubTasks(lfnFile + '_' + crabTask.tag)
441     cmd = 'cp ' + lfnFile + '_' + crabTask.tag + '_*' + ' ./'
442     os.system(cmd)
443    
444     nevents = len(crabTask.lfns)
445    
446     # Say what we do now
447     print ' Preparing dataset: ' + cmsDataset + ' [MIT: ' + mitDataset + ' with ' + str(nevents) + \
448     ' total jobs(=lfns), nEvtsTotal: %d]' % crabTask.nTotalEvts
449    
450     # --------------------------------------------------------------------------------------------------
451     # Prepare the config files
452     # --------------------------------------------------------------------------------------------------
453     # Cleaning up
454 paus 1.3 cmd = "rm -f crab_" + crabTask.tag + ".cfg crab_" + crabTask.tag + ".cfg-Template " + cmsswPy
455 paus 1.2 os.system(cmd)
456    
457 paus 1.12 # Storage
458    
459 paus 1.2 # Parse template input and write the crab configuration file
460     fileInput = open(crabFile,'r')
461 paus 1.3 fileOutput = open("crab_" + crabTask.tag + ".cfg-Template",'w')
462 paus 1.2 line = fileInput.readline()
463     while (line != ''):
464     if line[0] != '#':
465 paus 1.12 line = searchReplace(line,mitCfg,version,mitDataset,storage(seFile), \
466 paus 1.2 cmsDataset,cmsswPy,dbs,sched,blacklist,skpEvts)
467     fileOutput.write(line)
468     line = fileInput.readline()
469     fileInput .close()
470     fileOutput.close()
471    
472     # Parse template input and write the crab configuration file
473     fileInput = open(cmsswFile,'r')
474     fileOutput = open(cmsswPy,'w')
475     line = fileInput.readline()
476     while (line != ''):
477     if line[0] != '#':
478 paus 1.12 line = searchReplace(line,mitCfg,version,mitDataset,storage(seFile), \
479 paus 1.2 cmsDataset,cmsswPy,dbs,sched,blacklist,skpEvts)
480     fileOutput.write(line)
481     line = fileInput.readline()
482     fileInput .close()
483     fileOutput.close()
484    
485     # --------------------------------------------------------------------------------------------------
486     # Job creation and submission
487     # --------------------------------------------------------------------------------------------------
488     print '\n Using CMSSW version: ' + os.environ['CMSSW_VERSION']
489     print ' Using CRAB version: ' + os.environ['CRAB_VERS'] + '\n'
490    
491     pattern1 = 'working directory'
492 paus 1.5 pattern2 = 'Total of '
493 paus 1.2
494     # Go through the crabTask and submit each subtask to the grid
495     for subTask in crabTask.subTasks:
496    
497     tag = crabTask.tag + '_' + subTask.tag()
498     print ' Working on subtask: ' + tag
499    
500     nJobsTotal = 0
501     crabIdCheck = ''
502    
503     # adjust crab config
504 paus 1.3 fileInput = open("crab_" + crabTask.tag + ".cfg-Template",'r')
505     fileOutput = open("crab_" + crabTask.tag + ".cfg",'w')
506 paus 1.2 line = fileInput.readline()
507     while (line != ''):
508     if line[0] != '#':
509     line = adjustCfg(line,subTask.nSubTaskLfn,tag)
510     fileOutput.write(line)
511     line = fileInput.readline()
512     fileInput .close()
513     fileOutput.close()
514    
515     # ----------------------------------------------------------------------------------------------
516     # Deal with storage element area
517     # ----------------------------------------------------------------------------------------------
518     # find the forseen storage place
519 paus 1.3 cmd = 'grep ^storage_element crab_' + crabTask.tag + '.cfg-Template'
520 paus 1.2 for file in os.popen(cmd).readlines(): # run command
521     line = file[:-1] # strip '\n'
522     # decode the storage element name
523     names = line.split("=") # splitting every '='
524     storageEle = names.pop()
525     storageEle = re.sub("\s", "",storageEle)
526 paus 1.3 cmd = 'grep ^storage_path crab_' + crabTask.tag + '.cfg-Template'
527 paus 1.2 for file in os.popen(cmd).readlines(): # run command
528     line = file[:-1] # strip '\n'
529     # decode the storage directory name
530     names = line.split("=") # splitting every '='
531     names = names[1:]
532     storagePath = "=".join(names)
533     storagePath = re.sub("\s", "",storagePath)
534 paus 1.3 cmd = 'grep ^user_remote_dir crab_' + crabTask.tag + '.cfg-Template'
535 paus 1.2 for file in os.popen(cmd).readlines(): # run command
536     line = file[:-1] # strip '\n'
537     # decode the storage directory name
538     names = line.split(" ") # splitting every '='
539     storagePath += names[-1]
540     storageUrl = 'srm://' + storageEle + ':8443' + storagePath
541    
542     storagePath = adjustCfg(storagePath,subTask.nSubTaskLfn,tag)
543     storageUrl = adjustCfg(storageUrl, subTask.nSubTaskLfn,tag)
544    
545     print ' StorageUrl: ' + storageUrl
546    
547     # Create storage area
548     if test == 0: # for testing we do not create the directories
549     if (storageEle == 'srm.cern.ch'):
550     createDirCern(storagePath)
551     else:
552     createDirGeneral(storageEle,storagePath)
553    
554     # cmd = "crab -create -debug 3 -USER.ui_working_dir=" + tag + " | tee forDaniele "
555 paus 1.3 cmd = "crab -create -cfg crab_" + crabTask.tag + ".cfg -USER.ui_working_dir=" + tag
556 paus 1.2 print ' -> ' + cmd
557     if test != 0:
558     cmd = 'echo ' + cmd
559     print ' ' + cmd
560     for line in os.popen(cmd).readlines(): # run command
561     line = line[:-1] # strip '\n'
562     print ' CRAB: ' + line
563     if re.search(pattern1,line):
564     f = line.split("/")
565     crabIdCheck = f[-2]
566     if re.search(pattern2,line):
567     f = line.split(" ")
568 paus 1.5 nJobsTotal = f[4]
569 paus 1.2 if nJobsTotal == '':
570     nJobsTotal = f[2]
571 paus 1.3 # report
572 paus 1.2 print ' --> %s jobs created (%s).\n'%(nJobsTotal,tag)
573 paus 1.3 # and cleanup the temporary file for the subtask
574     cmd = "rm -f " + mitDataset + ".lfns_" + tag
575     os.system(cmd)
576    
577 paus 1.2 # adjust arguments
578     cmd = 'input.py --db=' + lfnFile + '_' + tag + ' --option=xml --dataset=' + cmsDataset + \
579     ' > ' + tag + '/share/arguments.xml'
580     print ' update arguments: ' + cmd
581     if test == 0:
582     os.system(cmd)
583    
584     # loop through the file and determine the submission parameters
585     block = ''
586     blocks = []
587     idx = 0
588     minIdxs = []
589     maxIdxs = []
590    
591     fileInput = open(lfnFile + '_' + tag,'r')
592     line = fileInput.readline()
593     while (line != ''):
594     line = line[:-1]
595     if line[0] != '#':
596     idx += 1
597     f = line.split()
598     block = f[0]
599     lfn = f[1]
600     nEvents = f[2]
601     if len(blocks) == 0 or block != blocks[-1]:
602     # new block found
603     blocks .append(block)
604     minIdxs.append(idx)
605     maxIdxs.append(idx)
606     else:
607     maxIdxs[-1] = idx
608     # keep the reading going
609     line = fileInput.readline()
610     fileInput .close()
611    
612     # merge blocks together if they use the same sites
613     lastSites = ''
614     idx = 0
615     mergedIdx = 0
616     mergedBlocks = []
617     mergedSites = []
618     mergedMinIdxs = []
619     mergedMaxIdxs = []
620     print '\n Show the unmerged blocks:'
621     for block in blocks:
622 paus 1.5
623 paus 1.12
624     if fixSites != '':
625     sites = fixSites
626     else:
627     cmd = "sites.py --block=" + block
628     cmd += " --dbs=" + dbs
629     for line in os.popen(cmd).readlines(): # run command
630     line = line[:-1]
631     sites = line
632    
633     sites = trans.translateSes(sites)
634     sites = trans.selectPreferred()
635 paus 1.5
636 paus 1.2 print ' Block ' + block + ' process: %d to %d'%(minIdxs[idx],maxIdxs[idx]) + \
637     ' at\n > ' + sites
638     # block with different sites found
639     if sites != lastSites:
640     mergedSites .append(sites)
641 paus 1.6 mergedBlocks .append(blocks [idx]) # only the first block with these sites is stored
642 paus 1.2 mergedMinIdxs.append(minIdxs[idx])
643     mergedMaxIdxs.append(maxIdxs[idx])
644 paus 1.6 lastSites = sites
645     mergedIdx += 1
646 paus 1.2 else:
647     mergedMaxIdxs[mergedIdx-1] = maxIdxs[idx]
648 paus 1.6
649 paus 1.2 # last action in the loop: increment the unmerged blocks
650     idx += 1
651    
652     # Show already what we will do
653     idx = 0
654     print '\n Show the merged blocks:'
655     for block in mergedBlocks:
656     print ' Merged Block ' + block + ' process: %d to %d'\
657     %(mergedMinIdxs[idx],mergedMaxIdxs[idx]) + \
658     ' at\n > ' + mergedSites[idx]
659     # last action in the loop: increment the merged blocks
660     idx += 1
661    
662     # perfrom the submission block by block (using the merged blocks of course)
663     nSubmission = len(mergedBlocks)
664     idx = 0
665     print '\n Submit the merged blocks:'
666     for block in mergedBlocks:
667     print ' Merged Block ' + block + ' process: %d to %d'\
668     %(mergedMinIdxs[idx],mergedMaxIdxs[idx]) + \
669     ' at\n > ' + mergedSites[idx]
670    
671     nSubmit = '%d-%d'%(mergedMinIdxs[idx],mergedMaxIdxs[idx])
672     if mergedMinIdxs[idx] == mergedMaxIdxs[idx]:
673     nSubmit = '%d,%d'%(mergedMinIdxs[idx],100000000)
674 paus 1.5 cmd = 'crab -submit %s -continue %s -GRID.ce_white_list=%s'%(nSubmit,tag,mergedSites[idx])
675     print ' ' + cmd + '\n'
676 paus 1.12 status = os.system(cmd)
677     while status != 0:
678     print ' Submission failed (%s) --> retry'%(cmd)
679 paus 1.3 status = os.system(cmd)
680    
681 paus 1.2 # last action in the loop: increment the merged blocks
682     idx += 1
683    
684     print ' Number of blocks submitted: %d' % nSubmission
685    
686 paus 1.3 # and cleanup the temporary file for the task
687     cmd = "rm -f crab_" + crabTask.tag + ".cfg crab_" + crabTask.tag + ".cfg-Template " \
688 paus 1.11 + cmsswPy + ' ' + cmsswPy + 'c'
689 paus 1.3 os.system(cmd)
690    
691 paus 1.2 print ' Done... keep watching it...'
692     sys.exit(0)
693    
694    
695     ## if nSubmit != -1:
696     ## cmd = 'crab -continue %s -submit %s'%(crabId,nSubmit)
697     ## print 'SUBMIT.PY: ' + cmd
698     ## status = os.system(cmd)
699     ## if status == 0:
700     ## print ' --> job submitted\n'
701     ## else:
702     ## leftOver = int(nJobsTotal)
703     ## nSubmitted = 0
704     ## nSubBatch = 80
705     ## while (nSubmitted < int(nJobsTotal)):
706     ## if leftOver < nSubBatch:
707     ## nSubBatch = leftOver
708     ##
709     ## cmd = 'crab -continue %s -submit %d'%(crabId,nSubBatch)
710     ## print 'SUBMIT.PY: ' + cmd
711     ## status = os.system(cmd)
712     ## if status == 0:
713     ## print ' --> %d job submitted\n'%(nSubBatch)
714     ## leftOver = leftOver - nSubBatch
715     ## nSubmitted = nSubmitted + nSubBatch
716     ##
717     ## #print 'SUBMIT.PY: ' + cmd
718     ## #status = os.system(cmd)
719     ## #if status == 0:
720     ## # print ' --> job submitted\n'
721     ##
722    
723     # --------------------------------------------------------------------------------------------------
724     # Run a test job to test the configuration and measure the expected output size
725     # --------------------------------------------------------------------------------------------------
726     if noTestJob == 0:
727     #-----------------------------------------------------------------------------------------------
728     # use a specific file for test
729     #-----------------------------------------------------------------------------------------------
730     # first check whether we already have a local file
731     print '\n Try to find an existing local file using "find"'
732     f = cmsDataset.split("/")
733     name = f[1]
734     vers = f[2]
735     tier = f[3]
736     file = ""
737     lfn = ""
738     cmd = 'find ./store/ -name ' + name
739     print 'Searching: ' + cmd
740     for line in os.popen(cmd).readlines(): # run command
741     file = line[:-1] # strip '\n'
742    
743     # looks like there could be a file we found a directory, confirm
744     if file != "":
745     print '\n We have a directory ' + file + ' .. confirming'
746     file = file + '/' + tier
747     cmd = 'find ' + file + ' -name \*.root'
748     for line in os.popen(cmd).readlines(): # run command
749     if line.find(vers):
750     file = line[:-1] # strip '\n'
751     lfn = file[1:]
752    
753     if os.path.exists(file):
754     print ' moving on with locally found file: \n' + ' ' + file
755     else:
756     print ' no local file found'
757     file = ""
758     lfn = ""
759    
760     # now try to see whether we can find a file to download
761     if file == "":
762     cmd = './bin/findLfn.py --input=' + cmsDataset + ' | grep /store/'
763     print '\n Find an LFN to download: ' + cmd
764     for line in os.popen(cmd).readlines(): # run command
765     if line.find("/store") != -1:
766     lfn = line[:-1] # strip '\n'
767     break
768     if lfn == "":
769     print "\n WARNING: No file found, continue assuming it is a simulation job.\n\n"
770     else:
771     print ' --> LFN: ' + lfn
772     file = '.' + lfn
773    
774     if os.path.exists(file):
775     print ' --> File already exists: ' + file
776     else:
777     cmd = './bin/downloadLfn.py ' + lfn
778     print ' --> downloading: ' + cmd
779     status = os.system(cmd)
780     if status != 0:
781     print ' ERROR - failed to copy LFN. EXIT now!'
782     sys.exit(1)
783    
784     # Parse template input and adjust the input file to the newly copied lfn
785     fileInput = open(cmsswPy,'r')
786     fileOutput = open("test-"+cmsswPy,'w')
787     line = fileInput.readline()
788     while (line != ''):
789     if line.find("file:") != -1:
790     line = '"file:' + lfn[1:] + '"\n'
791     fileOutput.write(line)
792     line = fileInput.readline()
793     fileInput .close()
794     fileOutput.close()
795    
796     # Setting the number of events (hard coded in the file so far)
797     nTryEvts = 1000.
798    
799     print '\n --> Please wait, running a test job now! Should be short (trying %.0f'%nTryEvts + \
800     ' evts). Check log: cmssw.log'
801     cmd = 'rm -f cmssw.log; /usr/bin/time --format "%e %U %S" cmsRun test-' + cmsswPy + \
802     ' >& cmssw.log'
803     print ' CMD: ' + cmd
804     status = os.system(cmd)
805    
806     cmd = 'tail -1 cmssw.log'
807     for file in os.popen(cmd).readlines(): # run command
808     line = file[:-1] # strip '\n'
809     f = line.split() # splitting every blank
810     rtime = float(f[0]) # wall clock time
811     utime = float(f[1]) # user time
812     stime = float(f[2]) # system time
813    
814     nEvtsTest = 1000
815     cmd = 'grep \'Begin processing\' cmssw.log | tail -1'
816     for file in os.popen(cmd).readlines(): # run command
817     line = file[:-1] # strip '\n'
818     # test whether there is a directory
819     f = line.split() # splitting every blank
820     nEvtsTest = f[3] # this is the number of records processed
821     nEvtsTest = int(nEvtsTest[:-2]) # strip 'th'
822     cmd = 'ls -s ' + mitDataset + '*.root'
823     size = 0
824     for file in os.popen(cmd).readlines(): # run command
825     line = file[:-1] # strip '\n'
826     f = line.split() # splitting every blank
827     size += int(f[0])/1000. # size in MB
828    
829     cmd = 'tail -1 cmssw.log'
830     for file in os.popen(cmd).readlines(): # run command
831     line = file[:-1] # strip '\n'
832     # get total, user and system times
833     names = line.split() # splitting every blank
834    
835     if nEvtsTest != nTryEvts:
836     print ' WARNING - Instead of %f did %d'%(nTryEvts,nEvtsTest)
837    
838     print ' '
839     print ' Number of test events produced: %d'%nEvtsTest
840     print ' File size for all events: %.2f MB'%size
841     print ' Processing time for all events: %.2f secs (u: %.2f s: %.2f)'%(rtime,utime,stime)
842     print ' '
843     print ' --> 1 event == %.2f secs'%(rtime/nEvtsTest)
844     print ' --> 1.00 GB == %d events'%(nEvtsTest/size*1024.)
845     print ' --> %.2f GB == %d events'%(nevents/(nEvtsTest/size*1024.),nevents)
846     print ' '
847    
848     if testJob == 1:
849     print '\n Test job finished, stopping now.\n'
850     sys.exit(0)
851    
852    
853     ## # are we just completing an existing production? and is there something to complete?
854     ## if complete == 1:
855     ## f = storagePath.split('=')
856     ## rfDir = f[-1]
857     ## #cmd = 'castorInventory.py --nJobs=%s %s | grep Missing'%(nJobsTotal,rfDir)
858     ## cmd = 'castorInventory.py --nJobs=%s %s | grep Missing'%(nJobsTotal,storageUrl)
859     ## #print ' CMD: ' + cmd
860     ## for line in os.popen(cmd).readlines(): # run command
861     ## line = line[:-1] # strip '\n'
862     ## f = line.split(':')
863     ## nSubmit = f[1].strip()
864     ## f = nSubmit.split(',')
865     ## if len(f) == 0 or nSubmit == '':
866     ## print ' No more jobs left it seems, nSubmit=' + nSubmit
867     ## cmd = 'rm -rf ' + crabId
868     ## print ' Cleanup: ' + cmd + '\n\n'
869     ## status = os.system(cmd)
870     ## sys.exit(0)
871     ## elif len(f) == 1:
872     ## nInvalid = str(int(nJobsTotal) + 1000)
873     ## print ' One more jobs left, careful, adjusted, nSubmit=' + nSubmit
874     ## nSubmit = nSubmit + ',' + nInvalid
875     ##
876     ## ### nSubmit = ",".join(f[:-1])
877     ## print ' Missing jobs are: ' + nSubmit
878     ##
879     ## sys.exit(0)