ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/Jeng/scripts/submitToCondor.py
Revision: 1.1
Committed: Tue Jul 14 19:08:21 2009 UTC (15 years, 9 months ago) by jengbou
Content type: text/x-python
Branch: MAIN
CVS Tags: HEAD
Log Message:
add file

File Contents

# User Rev Content
1 jengbou 1.1 #!/usr/bin/env python
2     ############################################################################
3     #
4     # Author: Francisco Yumiceva
5     # yumiceva@fnal.gov
6     #
7     # Fermilab, 2007
8     #
9     # 2009/07/14: Modification to prevent empty string for input fileNames
10     # Geng-yuan Jeng
11     ############################################################################
12    
13     """
14     Submit jobs to condor. It will create directories in the output path
15     to store configuration files and log files.
16    
17     e.g. submitToCondor.py -c ZprimeEventsReco_full.py -l datasets/zp1Tev.txt -n 10 -o /uscms_data/d1/yumiceva/CMSSW_1_3_6/TQAFAnalyzer/
18    
19     where ZprimeEventsReco_full.cfg is the configuration file. The configuration file
20     needs to be modified a bit: (1) remove all input files in the PoolSource and add a
21     keyword {FILENAME} you need also the brackets. The filename of the output file should
22     be changed to another keyword called {OUTPUT_FILENAME}.
23    
24     To Generate MC: In your cfg file you need to add also the keywords {SEED1},...,{SEED6} for
25     the random seeds which are going to be modified by the script using PYTHON random numbers,
26     you also need to change the output filename to {OUTPUT_FILENAME} All keywords need to have
27     brackets.
28    
29     usage: %prog
30     -m, --mc : to generate MC. it will handle random numbers.
31     -e, --events = EVENTS: number of events to generate MC. If -m flag is turned on.
32     -c, --cfg = CFG: python configuration file
33     -l, --list = LIST: file with list of files (dataset)
34     -n, --njobs = NJOBS: number of jobs.
35     -o, --output = OUTPUT: output path
36     -i, --initial= INITIAL: number of initial job
37     -f, --final= FINAL: number of final job
38     -s, --short : for short jobs that need high priority
39     -t, --test : do not submit anything just show what I would do
40    
41     Author: Francisco Yumiceva (yumiceva@fnal.gov)
42     """
43    
44    
45     import os,sys
46     try:
47     import random
48     except:
49     # now fix PYTHONPATH for the damn cmslpc python configuration
50     atemp = os.getenv("PYTHONPATH")
51     atempl = atemp.split(":")
52     natemp = ""
53     for iatemp in atempl:
54     if iatemp.find("python2.3") == -1:
55     natemp = natemp + iatemp
56     print "\n FIX PYTHONPATH environment variable, try:\n"
57     print "setenv PYTHONPATH "+ natemp
58     sys.exit()
59     #os.putenv("PYTHONPATH",natemp)
60     # PYTHONPATH fixed
61    
62     import string, re
63     from time import gmtime, localtime, strftime
64    
65     #_______________OPTIONS__________________________________
66     import optparse
67     USAGE = re.compile(r'(?s)\s*usage: (.*?)(\n[ \t]*\n|$)')
68     def nonzero(self): # will become the nonzero method of optparse.Values
69     "True if options were given"
70     for v in self.__dict__.itervalues():
71     if v is not None: return True
72     return False
73    
74     optparse.Values.__nonzero__ = nonzero # dynamically fix optparse.Values
75    
76     class ParsingError(Exception): pass
77    
78     optionstring=""
79    
80     def exit(msg=""):
81     raise SystemExit(msg or optionstring.replace("%prog",sys.argv[0]))
82    
83     def parse(docstring, arglist=None):
84     global optionstring
85     optionstring = docstring
86     match = USAGE.search(optionstring)
87     if not match: raise ParsingError("Cannot find the option string")
88     optlines = match.group(1).splitlines()
89     try:
90     p = optparse.OptionParser(optlines[0])
91     for line in optlines[1:]:
92     opt, help=line.split(':')[:2]
93     short,long=opt.split(',')[:2]
94     if '=' in opt:
95     action='store'
96     long=long.split('=')[0]
97     else:
98     action='store_true'
99     p.add_option(short.strip(),long.strip(),
100     action = action, help = help.strip())
101     except (IndexError,ValueError):
102     raise ParsingError("Cannot parse the option string correctly")
103     return p.parse_args(arglist)
104    
105     #______________________________________________________________________
106    
107     #
108     # path to this scripts and the used template file:
109     #
110     TemplatesPATH = os.environ['CMSSW_BASE']+"/src/submit2uaf/"
111     #
112     # path to the output that is created by this script
113     #
114     #main_path = "/uscmst1b_scratch/lpc1/cmsroc/yumiceva/CMSSW_1_2_0_pre3/src/RecoVertex/BeamSpotProducer/test/"
115     main_path = os.getcwd() + "/"
116    
117     out_path = main_path+"condorjobs/"
118    
119     scripts_path = main_path+"condorjobs/"
120     cfg_path = scripts_path+"cfg/"
121     #condor_path = scripts_path+"condor/"
122     csh_path = scripts_path+"csh/"
123     logs_path = scripts_path+"logs/"
124    
125    
126     istest = 0
127     isMC = 0
128     Nevents = "-1"
129     isshort = 0
130    
131     #
132     # Path to the input/output data:
133     #
134    
135     cfg_tags = ["{FILENAME}",
136     "{OUTPUT_FILENAME}"]
137    
138     cfg_tags_mc = ["{SEED1}",
139     "{SEED2}",
140     "{SEED3}",
141     "{SEED4}",
142     "{SEED5}",
143     "{SEED6}",
144     "{OUTPUT_FILENAME}"]
145    
146     scripts_tags = ["{PATHTOOUT}",
147     "{CONFIGFILE}",
148     "{OUTFILE}","{PATHTOBASE}","{OUTDIR}"]
149    
150     condor_tags = ["{EXECUTABLE}",
151     "{OUTPUT}",
152     "{ERROR}",
153     "{LOG}",
154     "{NOTIFY}"]
155    
156    
157     template_fnames = {}
158     template_fnames["cfg"] = TemplatesPATH+"/reco_template.cfg"
159     template_fnames["condor"] = TemplatesPATH+"/template.condor"
160     template_fnames["csh"] = TemplatesPATH+"/runcms_new2.csh"
161    
162    
163     def _mkdir(newdir):
164     """works the way a good mkdir should :)
165     - already exists, silently complete
166     - regular file in the way, raise an exception
167     - parent directory(ies) does not exist, make them as well
168     """
169     if os.path.isdir(newdir):
170     pass
171     elif os.path.isfile(newdir):
172     raise OSError("a file with the same name as the desired " \
173     "dir, '%s', already exists." % newdir)
174     else:
175     head, tail = os.path.split(newdir)
176     if head and not os.path.isdir(head):
177     _mkdir(head)
178     if tail:
179     os.mkdir(newdir)
180    
181     def change(infile,outfile,changearray,filearray):
182     fin = open(infile)
183     fout = open(outfile,"w")
184    
185     #if line.find("untracked")!=-1 and line.find("PSet")!=-1 and line.find("maxEvents")!=-1:
186     # line = "untracked PSet maxEvents = {untracked int32 input = "+str(Nevents)+"}\n"
187    
188     for line in fin.readlines():
189     for change in changearray:
190     #if change[0] == "{FILENAME}" and line.find(change[0])!=-1:
191     # line=line.replace(change[0] ,"")
192     # itmp=0
193     # for ifile in filearray:
194     # ifile = ifile.strip("\n")
195     # itmp = itmp + 1
196     # suffix = "\","
197     # if itmp == len(filearray):
198     # suffix = "\""
199     # fout.write("\""+ifile+suffix+"\n")
200    
201     #else:
202    
203     line=line.replace(change[0] ,change[1])
204    
205     fout.write(line)
206    
207     #
208     if outfile.find(".py")!=-1:
209     fout.write('''
210     process.maxEvents = cms.untracked.PSet(input = cms.untracked.int32(-1) )
211    
212     process.source = cms.Source("PoolSource",
213     fileNames = cms.untracked.vstring(
214     ''')
215     itmp=0
216     for ifile in filearray:
217     ifile = ifile.strip("\n")
218     itmp = itmp + 1
219     suffix = "\","
220     if itmp == len(filearray):
221     suffix = "\""
222     fout.write("\""+ifile+suffix+"\n")
223    
224     fout.write(")\n")
225     fout.write(", duplicateCheckMode = cms.untracked.string('noDuplicateCheck')\n")
226     fout.write(")\n")
227    
228    
229    
230     def submit_jobs(njob,array,ini_cfgfile,output_path):
231    
232     cfgfile = ini_cfgfile.replace("_template","")
233    
234     njobstr = str(njob)
235     while len(njobstr)<4:
236     njobstr = "0"+njobstr
237    
238     outfilename_cfg = cfg_path + cfgfile.replace(".py","_"+njobstr+".py")
239     outfilename_root = output_path+ cfgfile.replace(".py","_"+njobstr+".root")
240     outfilename_log = logs_path + cfgfile.replace(".py","_"+njobstr+".log")
241     outfilename_csh = csh_path + cfgfile.replace(".py","_"+njobstr+".csh")
242     outfilename_condor= csh_path + cfgfile.replace(".py","_"+njobstr+".condor")
243    
244     if os.path.isfile(outfilename_root):
245     print "Not submitting into condor batch farm since rootoutput already exists"
246     print "delete or rename: "+outfilename_root+"\n to resubmit"
247     sys.exit(main())
248    
249     #
250     # First create config (.cfg) file:
251     #
252    
253     changearray=[]
254    
255     if isMC:
256     random.seed(njob)
257     changearray.append((cfg_tags_mc[0],str(random.randint(1,987654321) ) ))
258     changearray.append((cfg_tags_mc[1],str(random.randint(1,987654321) ) ))
259     changearray.append((cfg_tags_mc[2],str(random.randint(1,987654321) ) ))
260     changearray.append((cfg_tags_mc[3],str(random.randint(1,987654321) ) ))
261     changearray.append((cfg_tags_mc[4],str(random.randint(1,987654321) ) ))
262     changearray.append((cfg_tags_mc[5],str(random.randint(1,987654321) ) ))
263     changearray.append((cfg_tags_mc[6],outfilename_root))
264     change(template_fnames["cfg"],outfilename_cfg,changearray,array)
265     #print outfilename_cfg + " has been written.\n"
266    
267     else:
268     changearray.append((cfg_tags[0],""))
269     changearray.append((cfg_tags[1],outfilename_root))
270     change(template_fnames["cfg"],outfilename_cfg,changearray,array)
271     #print outfilename_cfg + " has been written.\n"
272    
273    
274     #
275     # now create the script to process the file:
276     #
277     changearray=[]
278     changearray.append((scripts_tags[0],output_path+"/"))
279     changearray.append((scripts_tags[1],outfilename_cfg))
280     changearray.append((scripts_tags[2],outfilename_root))
281     changearray.append((scripts_tags[3],os.getenv("CMSSW_BASE")+"/src" ))
282     changearray.append((scripts_tags[4],output_path))
283     change(template_fnames["csh"],outfilename_csh,changearray,0)
284     os.chmod(outfilename_csh,0775)
285     #print outfilename_csh + " has been written.\n"
286    
287     #
288     # finally create the condor job description file:
289     #
290     stdout = outfilename_log.replace(".log",".stdout")
291     stderr = outfilename_log.replace(".log",".stderr")
292     logfiles = outfilename_log.replace(".log",".log")
293     changearray =[]
294     changearray.append((condor_tags[0],outfilename_csh))
295     changearray.append((condor_tags[1],stdout))
296     changearray.append((condor_tags[2],stderr))
297     changearray.append((condor_tags[3],logfiles))
298     changearray.append((condor_tags[4],os.getenv("USER")+"@fnal.gov"))
299     change(template_fnames["condor"],outfilename_condor,changearray,0)
300     if isshort:
301     fout = open(outfilename_condor,"a")
302     fout.write("+LENGTH=\"SHORT\"\n")
303     fout.close()
304    
305     #print outfilename_condor + " has been written.\n"
306    
307     submitcommand ="/opt/condor/bin/condor_submit "+outfilename_condor
308     print submitcommand+"\n"
309    
310     if not istest:
311     child = os.popen(submitcommand)
312     else:
313     print "Not submitting into condor batch farm. This is only a test"
314    
315    
316    
317     #________________________________________________________________
318     if __name__ =='__main__':
319    
320    
321     #import optionparse
322     option,args = parse(__doc__)
323     if not args and not option: exit()
324    
325     if not option.mc and (not option.cfg or not option.njobs or not option.list):
326     print " you need to provide configuration file, list of files, number of jobs"
327     optionparse.exit()
328    
329     ini_cfgfile = option.cfg
330    
331     template_fnames["cfg"] = ini_cfgfile
332    
333     if not option.mc:
334     list_of_files = option.list
335     else:
336     Nevents = str(option.events)
337    
338     number_of_jobs = option.njobs
339    
340     ini_run = 0
341     fin_run = 0
342    
343     if option.initial:
344     ini_run = int(option.initial)
345     if option.final:
346     fin_run = int(option.final)
347    
348     istest = option.test
349     isMC = option.mc
350     isshort = option.short
351    
352     output_path = ""
353    
354     if option.output:
355     output_path = option.output+"/"
356    
357     cfg_path = output_path+ "cfg/"
358     csh_path = output_path+ "csh/"
359     logs_path = output_path+ "logs/"
360    
361     subset = []
362     njob = 0
363     notrootfiles = 0
364    
365     #make directories
366     if not os.path.exists(cfg_path):
367     _mkdir(cfg_path)
368     if not os.path.exists(csh_path):
369     _mkdir(csh_path)
370     if not os.path.exists(logs_path):
371     _mkdir(logs_path)
372    
373    
374     #filesperjob = 0
375     #inputfile = []
376     if not option.mc:
377     inputfile = open(list_of_files)
378     totalfiles = len(inputfile.readlines())
379     inputfile.seek(0,0)
380    
381     for iline in inputfile:
382     #print iline
383     if iline.find(".root")==-1:
384     notrootfiles += 1
385    
386     #print notrootfiles
387     totalfiles -= notrootfiles
388     inputfile.seek(0,0)
389    
390     filesperjob = float(totalfiles)/float(number_of_jobs)
391     filesperjob = int(filesperjob)
392    
393     #print "Number of total root files = "
394     #print totalfiles
395    
396     for ifile in inputfile:
397     #print "ifile: "+ifile
398     ignoreline = 0
399    
400     if ifile.find(".root")==-1:
401     ignoreline = 1
402    
403     if ignoreline==0:
404     #print "count line, where len subset is " +str(len(subset)) +"/"+str(filesperjob)
405     ifile = ifile.strip("'")
406     ifile = ifile.strip('\',\n')
407     #ifile = ifile.strip("'")
408     if len(subset) == filesperjob:
409     njob = njob + 1
410     if fin_run == 0 and njob >= ini_run:
411     submit_jobs(njob,subset,ini_cfgfile,output_path)
412     elif fin_run > 0 and njob <= fin_run and njob >= ini_run:
413     submit_jobs(njob,subset,ini_cfgfile,output_path)
414    
415     subset = []
416     #print "add line: " + ifile
417     subset.append(ifile)
418    
419     #print " len = " + str(len(subset))+" subset = "
420     #print subset
421    
422     if len(subset)>0:
423     njob = njob + 1
424     if fin_run == 0 and njob >= ini_run:
425     submit_jobs(njob,subset,ini_cfgfile,output_path)
426     elif fin_run > 0 and njob <= fin_run and njob >= ini_run:
427     submit_jobs(njob,subset,ini_cfgfile,output_path)
428    
429     #submit_jobs(njob,subset,ini_cfgfile,output_path)
430    
431     else:
432     ijob = 1
433     while ijob <= int(number_of_jobs):
434     submit_jobs(ijob,subset,ini_cfgfile,output_path)
435     ijob = ijob + 1
436    
437    
438