ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/Jeng/scripts/submitToCondor.py
Revision: 1.1
Committed: Tue Jul 14 19:08:21 2009 UTC (15 years, 9 months ago) by jengbou
Content type: text/x-python
Branch: MAIN
CVS Tags: HEAD
Log Message:
add file

File Contents

# Content
1 #!/usr/bin/env python
2 ############################################################################
3 #
4 # Author: Francisco Yumiceva
5 # yumiceva@fnal.gov
6 #
7 # Fermilab, 2007
8 #
9 # 2009/07/14: Modification to prevent empty string for input fileNames
10 # Geng-yuan Jeng
11 ############################################################################
12
13 """
14 Submit jobs to condor. It will create directories in the output path
15 to store configuration files and log files.
16
17 e.g. submitToCondor.py -c ZprimeEventsReco_full.py -l datasets/zp1Tev.txt -n 10 -o /uscms_data/d1/yumiceva/CMSSW_1_3_6/TQAFAnalyzer/
18
19 where ZprimeEventsReco_full.cfg is the configuration file. The configuration file
20 needs to be modified a bit: (1) remove all input files in the PoolSource and add a
21 keyword {FILENAME} you need also the brackets. The filename of the output file should
22 be changed to another keyword called {OUTPUT_FILENAME}.
23
24 To Generate MC: In your cfg file you need to add also the keywords {SEED1},...,{SEED6} for
25 the random seeds which are going to be modified by the script using PYTHON random numbers,
26 you also need to change the output filename to {OUTPUT_FILENAME} All keywords need to have
27 brackets.
28
29 usage: %prog
30 -m, --mc : to generate MC. it will handle random numbers.
31 -e, --events = EVENTS: number of events to generate MC. If -m flag is turned on.
32 -c, --cfg = CFG: python configuration file
33 -l, --list = LIST: file with list of files (dataset)
34 -n, --njobs = NJOBS: number of jobs.
35 -o, --output = OUTPUT: output path
36 -i, --initial= INITIAL: number of initial job
37 -f, --final= FINAL: number of final job
38 -s, --short : for short jobs that need high priority
39 -t, --test : do not submit anything just show what I would do
40
41 Author: Francisco Yumiceva (yumiceva@fnal.gov)
42 """
43
44
45 import os,sys
46 try:
47 import random
48 except:
49 # now fix PYTHONPATH for the damn cmslpc python configuration
50 atemp = os.getenv("PYTHONPATH")
51 atempl = atemp.split(":")
52 natemp = ""
53 for iatemp in atempl:
54 if iatemp.find("python2.3") == -1:
55 natemp = natemp + iatemp
56 print "\n FIX PYTHONPATH environment variable, try:\n"
57 print "setenv PYTHONPATH "+ natemp
58 sys.exit()
59 #os.putenv("PYTHONPATH",natemp)
60 # PYTHONPATH fixed
61
62 import string, re
63 from time import gmtime, localtime, strftime
64
65 #_______________OPTIONS__________________________________
66 import optparse
67 USAGE = re.compile(r'(?s)\s*usage: (.*?)(\n[ \t]*\n|$)')
68 def nonzero(self): # will become the nonzero method of optparse.Values
69 "True if options were given"
70 for v in self.__dict__.itervalues():
71 if v is not None: return True
72 return False
73
74 optparse.Values.__nonzero__ = nonzero # dynamically fix optparse.Values
75
76 class ParsingError(Exception): pass
77
78 optionstring=""
79
80 def exit(msg=""):
81 raise SystemExit(msg or optionstring.replace("%prog",sys.argv[0]))
82
83 def parse(docstring, arglist=None):
84 global optionstring
85 optionstring = docstring
86 match = USAGE.search(optionstring)
87 if not match: raise ParsingError("Cannot find the option string")
88 optlines = match.group(1).splitlines()
89 try:
90 p = optparse.OptionParser(optlines[0])
91 for line in optlines[1:]:
92 opt, help=line.split(':')[:2]
93 short,long=opt.split(',')[:2]
94 if '=' in opt:
95 action='store'
96 long=long.split('=')[0]
97 else:
98 action='store_true'
99 p.add_option(short.strip(),long.strip(),
100 action = action, help = help.strip())
101 except (IndexError,ValueError):
102 raise ParsingError("Cannot parse the option string correctly")
103 return p.parse_args(arglist)
104
105 #______________________________________________________________________
106
107 #
108 # path to this scripts and the used template file:
109 #
110 TemplatesPATH = os.environ['CMSSW_BASE']+"/src/submit2uaf/"
111 #
112 # path to the output that is created by this script
113 #
114 #main_path = "/uscmst1b_scratch/lpc1/cmsroc/yumiceva/CMSSW_1_2_0_pre3/src/RecoVertex/BeamSpotProducer/test/"
115 main_path = os.getcwd() + "/"
116
117 out_path = main_path+"condorjobs/"
118
119 scripts_path = main_path+"condorjobs/"
120 cfg_path = scripts_path+"cfg/"
121 #condor_path = scripts_path+"condor/"
122 csh_path = scripts_path+"csh/"
123 logs_path = scripts_path+"logs/"
124
125
126 istest = 0
127 isMC = 0
128 Nevents = "-1"
129 isshort = 0
130
131 #
132 # Path to the input/output data:
133 #
134
135 cfg_tags = ["{FILENAME}",
136 "{OUTPUT_FILENAME}"]
137
138 cfg_tags_mc = ["{SEED1}",
139 "{SEED2}",
140 "{SEED3}",
141 "{SEED4}",
142 "{SEED5}",
143 "{SEED6}",
144 "{OUTPUT_FILENAME}"]
145
146 scripts_tags = ["{PATHTOOUT}",
147 "{CONFIGFILE}",
148 "{OUTFILE}","{PATHTOBASE}","{OUTDIR}"]
149
150 condor_tags = ["{EXECUTABLE}",
151 "{OUTPUT}",
152 "{ERROR}",
153 "{LOG}",
154 "{NOTIFY}"]
155
156
157 template_fnames = {}
158 template_fnames["cfg"] = TemplatesPATH+"/reco_template.cfg"
159 template_fnames["condor"] = TemplatesPATH+"/template.condor"
160 template_fnames["csh"] = TemplatesPATH+"/runcms_new2.csh"
161
162
163 def _mkdir(newdir):
164 """works the way a good mkdir should :)
165 - already exists, silently complete
166 - regular file in the way, raise an exception
167 - parent directory(ies) does not exist, make them as well
168 """
169 if os.path.isdir(newdir):
170 pass
171 elif os.path.isfile(newdir):
172 raise OSError("a file with the same name as the desired " \
173 "dir, '%s', already exists." % newdir)
174 else:
175 head, tail = os.path.split(newdir)
176 if head and not os.path.isdir(head):
177 _mkdir(head)
178 if tail:
179 os.mkdir(newdir)
180
181 def change(infile,outfile,changearray,filearray):
182 fin = open(infile)
183 fout = open(outfile,"w")
184
185 #if line.find("untracked")!=-1 and line.find("PSet")!=-1 and line.find("maxEvents")!=-1:
186 # line = "untracked PSet maxEvents = {untracked int32 input = "+str(Nevents)+"}\n"
187
188 for line in fin.readlines():
189 for change in changearray:
190 #if change[0] == "{FILENAME}" and line.find(change[0])!=-1:
191 # line=line.replace(change[0] ,"")
192 # itmp=0
193 # for ifile in filearray:
194 # ifile = ifile.strip("\n")
195 # itmp = itmp + 1
196 # suffix = "\","
197 # if itmp == len(filearray):
198 # suffix = "\""
199 # fout.write("\""+ifile+suffix+"\n")
200
201 #else:
202
203 line=line.replace(change[0] ,change[1])
204
205 fout.write(line)
206
207 #
208 if outfile.find(".py")!=-1:
209 fout.write('''
210 process.maxEvents = cms.untracked.PSet(input = cms.untracked.int32(-1) )
211
212 process.source = cms.Source("PoolSource",
213 fileNames = cms.untracked.vstring(
214 ''')
215 itmp=0
216 for ifile in filearray:
217 ifile = ifile.strip("\n")
218 itmp = itmp + 1
219 suffix = "\","
220 if itmp == len(filearray):
221 suffix = "\""
222 fout.write("\""+ifile+suffix+"\n")
223
224 fout.write(")\n")
225 fout.write(", duplicateCheckMode = cms.untracked.string('noDuplicateCheck')\n")
226 fout.write(")\n")
227
228
229
230 def submit_jobs(njob,array,ini_cfgfile,output_path):
231
232 cfgfile = ini_cfgfile.replace("_template","")
233
234 njobstr = str(njob)
235 while len(njobstr)<4:
236 njobstr = "0"+njobstr
237
238 outfilename_cfg = cfg_path + cfgfile.replace(".py","_"+njobstr+".py")
239 outfilename_root = output_path+ cfgfile.replace(".py","_"+njobstr+".root")
240 outfilename_log = logs_path + cfgfile.replace(".py","_"+njobstr+".log")
241 outfilename_csh = csh_path + cfgfile.replace(".py","_"+njobstr+".csh")
242 outfilename_condor= csh_path + cfgfile.replace(".py","_"+njobstr+".condor")
243
244 if os.path.isfile(outfilename_root):
245 print "Not submitting into condor batch farm since rootoutput already exists"
246 print "delete or rename: "+outfilename_root+"\n to resubmit"
247 sys.exit(main())
248
249 #
250 # First create config (.cfg) file:
251 #
252
253 changearray=[]
254
255 if isMC:
256 random.seed(njob)
257 changearray.append((cfg_tags_mc[0],str(random.randint(1,987654321) ) ))
258 changearray.append((cfg_tags_mc[1],str(random.randint(1,987654321) ) ))
259 changearray.append((cfg_tags_mc[2],str(random.randint(1,987654321) ) ))
260 changearray.append((cfg_tags_mc[3],str(random.randint(1,987654321) ) ))
261 changearray.append((cfg_tags_mc[4],str(random.randint(1,987654321) ) ))
262 changearray.append((cfg_tags_mc[5],str(random.randint(1,987654321) ) ))
263 changearray.append((cfg_tags_mc[6],outfilename_root))
264 change(template_fnames["cfg"],outfilename_cfg,changearray,array)
265 #print outfilename_cfg + " has been written.\n"
266
267 else:
268 changearray.append((cfg_tags[0],""))
269 changearray.append((cfg_tags[1],outfilename_root))
270 change(template_fnames["cfg"],outfilename_cfg,changearray,array)
271 #print outfilename_cfg + " has been written.\n"
272
273
274 #
275 # now create the script to process the file:
276 #
277 changearray=[]
278 changearray.append((scripts_tags[0],output_path+"/"))
279 changearray.append((scripts_tags[1],outfilename_cfg))
280 changearray.append((scripts_tags[2],outfilename_root))
281 changearray.append((scripts_tags[3],os.getenv("CMSSW_BASE")+"/src" ))
282 changearray.append((scripts_tags[4],output_path))
283 change(template_fnames["csh"],outfilename_csh,changearray,0)
284 os.chmod(outfilename_csh,0775)
285 #print outfilename_csh + " has been written.\n"
286
287 #
288 # finally create the condor job description file:
289 #
290 stdout = outfilename_log.replace(".log",".stdout")
291 stderr = outfilename_log.replace(".log",".stderr")
292 logfiles = outfilename_log.replace(".log",".log")
293 changearray =[]
294 changearray.append((condor_tags[0],outfilename_csh))
295 changearray.append((condor_tags[1],stdout))
296 changearray.append((condor_tags[2],stderr))
297 changearray.append((condor_tags[3],logfiles))
298 changearray.append((condor_tags[4],os.getenv("USER")+"@fnal.gov"))
299 change(template_fnames["condor"],outfilename_condor,changearray,0)
300 if isshort:
301 fout = open(outfilename_condor,"a")
302 fout.write("+LENGTH=\"SHORT\"\n")
303 fout.close()
304
305 #print outfilename_condor + " has been written.\n"
306
307 submitcommand ="/opt/condor/bin/condor_submit "+outfilename_condor
308 print submitcommand+"\n"
309
310 if not istest:
311 child = os.popen(submitcommand)
312 else:
313 print "Not submitting into condor batch farm. This is only a test"
314
315
316
317 #________________________________________________________________
318 if __name__ =='__main__':
319
320
321 #import optionparse
322 option,args = parse(__doc__)
323 if not args and not option: exit()
324
325 if not option.mc and (not option.cfg or not option.njobs or not option.list):
326 print " you need to provide configuration file, list of files, number of jobs"
327 optionparse.exit()
328
329 ini_cfgfile = option.cfg
330
331 template_fnames["cfg"] = ini_cfgfile
332
333 if not option.mc:
334 list_of_files = option.list
335 else:
336 Nevents = str(option.events)
337
338 number_of_jobs = option.njobs
339
340 ini_run = 0
341 fin_run = 0
342
343 if option.initial:
344 ini_run = int(option.initial)
345 if option.final:
346 fin_run = int(option.final)
347
348 istest = option.test
349 isMC = option.mc
350 isshort = option.short
351
352 output_path = ""
353
354 if option.output:
355 output_path = option.output+"/"
356
357 cfg_path = output_path+ "cfg/"
358 csh_path = output_path+ "csh/"
359 logs_path = output_path+ "logs/"
360
361 subset = []
362 njob = 0
363 notrootfiles = 0
364
365 #make directories
366 if not os.path.exists(cfg_path):
367 _mkdir(cfg_path)
368 if not os.path.exists(csh_path):
369 _mkdir(csh_path)
370 if not os.path.exists(logs_path):
371 _mkdir(logs_path)
372
373
374 #filesperjob = 0
375 #inputfile = []
376 if not option.mc:
377 inputfile = open(list_of_files)
378 totalfiles = len(inputfile.readlines())
379 inputfile.seek(0,0)
380
381 for iline in inputfile:
382 #print iline
383 if iline.find(".root")==-1:
384 notrootfiles += 1
385
386 #print notrootfiles
387 totalfiles -= notrootfiles
388 inputfile.seek(0,0)
389
390 filesperjob = float(totalfiles)/float(number_of_jobs)
391 filesperjob = int(filesperjob)
392
393 #print "Number of total root files = "
394 #print totalfiles
395
396 for ifile in inputfile:
397 #print "ifile: "+ifile
398 ignoreline = 0
399
400 if ifile.find(".root")==-1:
401 ignoreline = 1
402
403 if ignoreline==0:
404 #print "count line, where len subset is " +str(len(subset)) +"/"+str(filesperjob)
405 ifile = ifile.strip("'")
406 ifile = ifile.strip('\',\n')
407 #ifile = ifile.strip("'")
408 if len(subset) == filesperjob:
409 njob = njob + 1
410 if fin_run == 0 and njob >= ini_run:
411 submit_jobs(njob,subset,ini_cfgfile,output_path)
412 elif fin_run > 0 and njob <= fin_run and njob >= ini_run:
413 submit_jobs(njob,subset,ini_cfgfile,output_path)
414
415 subset = []
416 #print "add line: " + ifile
417 subset.append(ifile)
418
419 #print " len = " + str(len(subset))+" subset = "
420 #print subset
421
422 if len(subset)>0:
423 njob = njob + 1
424 if fin_run == 0 and njob >= ini_run:
425 submit_jobs(njob,subset,ini_cfgfile,output_path)
426 elif fin_run > 0 and njob <= fin_run and njob >= ini_run:
427 submit_jobs(njob,subset,ini_cfgfile,output_path)
428
429 #submit_jobs(njob,subset,ini_cfgfile,output_path)
430
431 else:
432 ijob = 1
433 while ijob <= int(number_of_jobs):
434 submit_jobs(ijob,subset,ini_cfgfile,output_path)
435 ijob = ijob + 1
436
437
438