1 |
#!/usr/bin/env python
|
2 |
############################################################################
|
3 |
#
|
4 |
# Author: Francisco Yumiceva
|
5 |
# yumiceva@fnal.gov
|
6 |
#
|
7 |
# Fermilab, 2007
|
8 |
#
|
9 |
# 2009/07/14: Modification to prevent empty string for input fileNames
|
10 |
# Geng-yuan Jeng
|
11 |
############################################################################
|
12 |
|
13 |
"""
|
14 |
Submit jobs to condor. It will create directories in the output path
|
15 |
to store configuration files and log files.
|
16 |
|
17 |
e.g. submitToCondor.py -c ZprimeEventsReco_full.py -l datasets/zp1Tev.txt -n 10 -o /uscms_data/d1/yumiceva/CMSSW_1_3_6/TQAFAnalyzer/
|
18 |
|
19 |
where ZprimeEventsReco_full.cfg is the configuration file. The configuration file
|
20 |
needs to be modified a bit: (1) remove all input files in the PoolSource and add a
|
21 |
keyword {FILENAME} you need also the brackets. The filename of the output file should
|
22 |
be changed to another keyword called {OUTPUT_FILENAME}.
|
23 |
|
24 |
To Generate MC: In your cfg file you need to add also the keywords {SEED1},...,{SEED6} for
|
25 |
the random seeds which are going to be modified by the script using PYTHON random numbers,
|
26 |
you also need to change the output filename to {OUTPUT_FILENAME} All keywords need to have
|
27 |
brackets.
|
28 |
|
29 |
usage: %prog
|
30 |
-m, --mc : to generate MC. it will handle random numbers.
|
31 |
-e, --events = EVENTS: number of events to generate MC. If -m flag is turned on.
|
32 |
-c, --cfg = CFG: python configuration file
|
33 |
-l, --list = LIST: file with list of files (dataset)
|
34 |
-n, --njobs = NJOBS: number of jobs.
|
35 |
-o, --output = OUTPUT: output path
|
36 |
-i, --initial= INITIAL: number of initial job
|
37 |
-f, --final= FINAL: number of final job
|
38 |
-s, --short : for short jobs that need high priority
|
39 |
-t, --test : do not submit anything just show what I would do
|
40 |
|
41 |
Author: Francisco Yumiceva (yumiceva@fnal.gov)
|
42 |
"""
|
43 |
|
44 |
|
45 |
import os,sys
|
46 |
try:
|
47 |
import random
|
48 |
except:
|
49 |
# now fix PYTHONPATH for the damn cmslpc python configuration
|
50 |
atemp = os.getenv("PYTHONPATH")
|
51 |
atempl = atemp.split(":")
|
52 |
natemp = ""
|
53 |
for iatemp in atempl:
|
54 |
if iatemp.find("python2.3") == -1:
|
55 |
natemp = natemp + iatemp
|
56 |
print "\n FIX PYTHONPATH environment variable, try:\n"
|
57 |
print "setenv PYTHONPATH "+ natemp
|
58 |
sys.exit()
|
59 |
#os.putenv("PYTHONPATH",natemp)
|
60 |
# PYTHONPATH fixed
|
61 |
|
62 |
import string, re
|
63 |
from time import gmtime, localtime, strftime
|
64 |
|
65 |
#_______________OPTIONS__________________________________
|
66 |
import optparse
|
67 |
USAGE = re.compile(r'(?s)\s*usage: (.*?)(\n[ \t]*\n|$)')
|
68 |
def nonzero(self): # will become the nonzero method of optparse.Values
|
69 |
"True if options were given"
|
70 |
for v in self.__dict__.itervalues():
|
71 |
if v is not None: return True
|
72 |
return False
|
73 |
|
74 |
optparse.Values.__nonzero__ = nonzero # dynamically fix optparse.Values
|
75 |
|
76 |
class ParsingError(Exception): pass
|
77 |
|
78 |
optionstring=""
|
79 |
|
80 |
def exit(msg=""):
|
81 |
raise SystemExit(msg or optionstring.replace("%prog",sys.argv[0]))
|
82 |
|
83 |
def parse(docstring, arglist=None):
|
84 |
global optionstring
|
85 |
optionstring = docstring
|
86 |
match = USAGE.search(optionstring)
|
87 |
if not match: raise ParsingError("Cannot find the option string")
|
88 |
optlines = match.group(1).splitlines()
|
89 |
try:
|
90 |
p = optparse.OptionParser(optlines[0])
|
91 |
for line in optlines[1:]:
|
92 |
opt, help=line.split(':')[:2]
|
93 |
short,long=opt.split(',')[:2]
|
94 |
if '=' in opt:
|
95 |
action='store'
|
96 |
long=long.split('=')[0]
|
97 |
else:
|
98 |
action='store_true'
|
99 |
p.add_option(short.strip(),long.strip(),
|
100 |
action = action, help = help.strip())
|
101 |
except (IndexError,ValueError):
|
102 |
raise ParsingError("Cannot parse the option string correctly")
|
103 |
return p.parse_args(arglist)
|
104 |
|
105 |
#______________________________________________________________________
|
106 |
|
107 |
#
|
108 |
# path to this scripts and the used template file:
|
109 |
#
|
110 |
TemplatesPATH = os.environ['CMSSW_BASE']+"/src/submit2uaf/"
|
111 |
#
|
112 |
# path to the output that is created by this script
|
113 |
#
|
114 |
#main_path = "/uscmst1b_scratch/lpc1/cmsroc/yumiceva/CMSSW_1_2_0_pre3/src/RecoVertex/BeamSpotProducer/test/"
|
115 |
main_path = os.getcwd() + "/"
|
116 |
|
117 |
out_path = main_path+"condorjobs/"
|
118 |
|
119 |
scripts_path = main_path+"condorjobs/"
|
120 |
cfg_path = scripts_path+"cfg/"
|
121 |
#condor_path = scripts_path+"condor/"
|
122 |
csh_path = scripts_path+"csh/"
|
123 |
logs_path = scripts_path+"logs/"
|
124 |
|
125 |
|
126 |
istest = 0
|
127 |
isMC = 0
|
128 |
Nevents = "-1"
|
129 |
isshort = 0
|
130 |
|
131 |
#
|
132 |
# Path to the input/output data:
|
133 |
#
|
134 |
|
135 |
cfg_tags = ["{FILENAME}",
|
136 |
"{OUTPUT_FILENAME}"]
|
137 |
|
138 |
cfg_tags_mc = ["{SEED1}",
|
139 |
"{SEED2}",
|
140 |
"{SEED3}",
|
141 |
"{SEED4}",
|
142 |
"{SEED5}",
|
143 |
"{SEED6}",
|
144 |
"{OUTPUT_FILENAME}"]
|
145 |
|
146 |
scripts_tags = ["{PATHTOOUT}",
|
147 |
"{CONFIGFILE}",
|
148 |
"{OUTFILE}","{PATHTOBASE}","{OUTDIR}"]
|
149 |
|
150 |
condor_tags = ["{EXECUTABLE}",
|
151 |
"{OUTPUT}",
|
152 |
"{ERROR}",
|
153 |
"{LOG}",
|
154 |
"{NOTIFY}"]
|
155 |
|
156 |
|
157 |
template_fnames = {}
|
158 |
template_fnames["cfg"] = TemplatesPATH+"/reco_template.cfg"
|
159 |
template_fnames["condor"] = TemplatesPATH+"/template.condor"
|
160 |
template_fnames["csh"] = TemplatesPATH+"/runcms_new2.csh"
|
161 |
|
162 |
|
163 |
def _mkdir(newdir):
|
164 |
"""works the way a good mkdir should :)
|
165 |
- already exists, silently complete
|
166 |
- regular file in the way, raise an exception
|
167 |
- parent directory(ies) does not exist, make them as well
|
168 |
"""
|
169 |
if os.path.isdir(newdir):
|
170 |
pass
|
171 |
elif os.path.isfile(newdir):
|
172 |
raise OSError("a file with the same name as the desired " \
|
173 |
"dir, '%s', already exists." % newdir)
|
174 |
else:
|
175 |
head, tail = os.path.split(newdir)
|
176 |
if head and not os.path.isdir(head):
|
177 |
_mkdir(head)
|
178 |
if tail:
|
179 |
os.mkdir(newdir)
|
180 |
|
181 |
def change(infile,outfile,changearray,filearray):
|
182 |
fin = open(infile)
|
183 |
fout = open(outfile,"w")
|
184 |
|
185 |
#if line.find("untracked")!=-1 and line.find("PSet")!=-1 and line.find("maxEvents")!=-1:
|
186 |
# line = "untracked PSet maxEvents = {untracked int32 input = "+str(Nevents)+"}\n"
|
187 |
|
188 |
for line in fin.readlines():
|
189 |
for change in changearray:
|
190 |
#if change[0] == "{FILENAME}" and line.find(change[0])!=-1:
|
191 |
# line=line.replace(change[0] ,"")
|
192 |
# itmp=0
|
193 |
# for ifile in filearray:
|
194 |
# ifile = ifile.strip("\n")
|
195 |
# itmp = itmp + 1
|
196 |
# suffix = "\","
|
197 |
# if itmp == len(filearray):
|
198 |
# suffix = "\""
|
199 |
# fout.write("\""+ifile+suffix+"\n")
|
200 |
|
201 |
#else:
|
202 |
|
203 |
line=line.replace(change[0] ,change[1])
|
204 |
|
205 |
fout.write(line)
|
206 |
|
207 |
#
|
208 |
if outfile.find(".py")!=-1:
|
209 |
fout.write('''
|
210 |
process.maxEvents = cms.untracked.PSet(input = cms.untracked.int32(-1) )
|
211 |
|
212 |
process.source = cms.Source("PoolSource",
|
213 |
fileNames = cms.untracked.vstring(
|
214 |
''')
|
215 |
itmp=0
|
216 |
for ifile in filearray:
|
217 |
ifile = ifile.strip("\n")
|
218 |
itmp = itmp + 1
|
219 |
suffix = "\","
|
220 |
if itmp == len(filearray):
|
221 |
suffix = "\""
|
222 |
fout.write("\""+ifile+suffix+"\n")
|
223 |
|
224 |
fout.write(")\n")
|
225 |
fout.write(", duplicateCheckMode = cms.untracked.string('noDuplicateCheck')\n")
|
226 |
fout.write(")\n")
|
227 |
|
228 |
|
229 |
|
230 |
def submit_jobs(njob,array,ini_cfgfile,output_path):
|
231 |
|
232 |
cfgfile = ini_cfgfile.replace("_template","")
|
233 |
|
234 |
njobstr = str(njob)
|
235 |
while len(njobstr)<4:
|
236 |
njobstr = "0"+njobstr
|
237 |
|
238 |
outfilename_cfg = cfg_path + cfgfile.replace(".py","_"+njobstr+".py")
|
239 |
outfilename_root = output_path+ cfgfile.replace(".py","_"+njobstr+".root")
|
240 |
outfilename_log = logs_path + cfgfile.replace(".py","_"+njobstr+".log")
|
241 |
outfilename_csh = csh_path + cfgfile.replace(".py","_"+njobstr+".csh")
|
242 |
outfilename_condor= csh_path + cfgfile.replace(".py","_"+njobstr+".condor")
|
243 |
|
244 |
if os.path.isfile(outfilename_root):
|
245 |
print "Not submitting into condor batch farm since rootoutput already exists"
|
246 |
print "delete or rename: "+outfilename_root+"\n to resubmit"
|
247 |
sys.exit(main())
|
248 |
|
249 |
#
|
250 |
# First create config (.cfg) file:
|
251 |
#
|
252 |
|
253 |
changearray=[]
|
254 |
|
255 |
if isMC:
|
256 |
random.seed(njob)
|
257 |
changearray.append((cfg_tags_mc[0],str(random.randint(1,987654321) ) ))
|
258 |
changearray.append((cfg_tags_mc[1],str(random.randint(1,987654321) ) ))
|
259 |
changearray.append((cfg_tags_mc[2],str(random.randint(1,987654321) ) ))
|
260 |
changearray.append((cfg_tags_mc[3],str(random.randint(1,987654321) ) ))
|
261 |
changearray.append((cfg_tags_mc[4],str(random.randint(1,987654321) ) ))
|
262 |
changearray.append((cfg_tags_mc[5],str(random.randint(1,987654321) ) ))
|
263 |
changearray.append((cfg_tags_mc[6],outfilename_root))
|
264 |
change(template_fnames["cfg"],outfilename_cfg,changearray,array)
|
265 |
#print outfilename_cfg + " has been written.\n"
|
266 |
|
267 |
else:
|
268 |
changearray.append((cfg_tags[0],""))
|
269 |
changearray.append((cfg_tags[1],outfilename_root))
|
270 |
change(template_fnames["cfg"],outfilename_cfg,changearray,array)
|
271 |
#print outfilename_cfg + " has been written.\n"
|
272 |
|
273 |
|
274 |
#
|
275 |
# now create the script to process the file:
|
276 |
#
|
277 |
changearray=[]
|
278 |
changearray.append((scripts_tags[0],output_path+"/"))
|
279 |
changearray.append((scripts_tags[1],outfilename_cfg))
|
280 |
changearray.append((scripts_tags[2],outfilename_root))
|
281 |
changearray.append((scripts_tags[3],os.getenv("CMSSW_BASE")+"/src" ))
|
282 |
changearray.append((scripts_tags[4],output_path))
|
283 |
change(template_fnames["csh"],outfilename_csh,changearray,0)
|
284 |
os.chmod(outfilename_csh,0775)
|
285 |
#print outfilename_csh + " has been written.\n"
|
286 |
|
287 |
#
|
288 |
# finally create the condor job description file:
|
289 |
#
|
290 |
stdout = outfilename_log.replace(".log",".stdout")
|
291 |
stderr = outfilename_log.replace(".log",".stderr")
|
292 |
logfiles = outfilename_log.replace(".log",".log")
|
293 |
changearray =[]
|
294 |
changearray.append((condor_tags[0],outfilename_csh))
|
295 |
changearray.append((condor_tags[1],stdout))
|
296 |
changearray.append((condor_tags[2],stderr))
|
297 |
changearray.append((condor_tags[3],logfiles))
|
298 |
changearray.append((condor_tags[4],os.getenv("USER")+"@fnal.gov"))
|
299 |
change(template_fnames["condor"],outfilename_condor,changearray,0)
|
300 |
if isshort:
|
301 |
fout = open(outfilename_condor,"a")
|
302 |
fout.write("+LENGTH=\"SHORT\"\n")
|
303 |
fout.close()
|
304 |
|
305 |
#print outfilename_condor + " has been written.\n"
|
306 |
|
307 |
submitcommand ="/opt/condor/bin/condor_submit "+outfilename_condor
|
308 |
print submitcommand+"\n"
|
309 |
|
310 |
if not istest:
|
311 |
child = os.popen(submitcommand)
|
312 |
else:
|
313 |
print "Not submitting into condor batch farm. This is only a test"
|
314 |
|
315 |
|
316 |
|
317 |
#________________________________________________________________
|
318 |
if __name__ =='__main__':
|
319 |
|
320 |
|
321 |
#import optionparse
|
322 |
option,args = parse(__doc__)
|
323 |
if not args and not option: exit()
|
324 |
|
325 |
if not option.mc and (not option.cfg or not option.njobs or not option.list):
|
326 |
print " you need to provide configuration file, list of files, number of jobs"
|
327 |
optionparse.exit()
|
328 |
|
329 |
ini_cfgfile = option.cfg
|
330 |
|
331 |
template_fnames["cfg"] = ini_cfgfile
|
332 |
|
333 |
if not option.mc:
|
334 |
list_of_files = option.list
|
335 |
else:
|
336 |
Nevents = str(option.events)
|
337 |
|
338 |
number_of_jobs = option.njobs
|
339 |
|
340 |
ini_run = 0
|
341 |
fin_run = 0
|
342 |
|
343 |
if option.initial:
|
344 |
ini_run = int(option.initial)
|
345 |
if option.final:
|
346 |
fin_run = int(option.final)
|
347 |
|
348 |
istest = option.test
|
349 |
isMC = option.mc
|
350 |
isshort = option.short
|
351 |
|
352 |
output_path = ""
|
353 |
|
354 |
if option.output:
|
355 |
output_path = option.output+"/"
|
356 |
|
357 |
cfg_path = output_path+ "cfg/"
|
358 |
csh_path = output_path+ "csh/"
|
359 |
logs_path = output_path+ "logs/"
|
360 |
|
361 |
subset = []
|
362 |
njob = 0
|
363 |
notrootfiles = 0
|
364 |
|
365 |
#make directories
|
366 |
if not os.path.exists(cfg_path):
|
367 |
_mkdir(cfg_path)
|
368 |
if not os.path.exists(csh_path):
|
369 |
_mkdir(csh_path)
|
370 |
if not os.path.exists(logs_path):
|
371 |
_mkdir(logs_path)
|
372 |
|
373 |
|
374 |
#filesperjob = 0
|
375 |
#inputfile = []
|
376 |
if not option.mc:
|
377 |
inputfile = open(list_of_files)
|
378 |
totalfiles = len(inputfile.readlines())
|
379 |
inputfile.seek(0,0)
|
380 |
|
381 |
for iline in inputfile:
|
382 |
#print iline
|
383 |
if iline.find(".root")==-1:
|
384 |
notrootfiles += 1
|
385 |
|
386 |
#print notrootfiles
|
387 |
totalfiles -= notrootfiles
|
388 |
inputfile.seek(0,0)
|
389 |
|
390 |
filesperjob = float(totalfiles)/float(number_of_jobs)
|
391 |
filesperjob = int(filesperjob)
|
392 |
|
393 |
#print "Number of total root files = "
|
394 |
#print totalfiles
|
395 |
|
396 |
for ifile in inputfile:
|
397 |
#print "ifile: "+ifile
|
398 |
ignoreline = 0
|
399 |
|
400 |
if ifile.find(".root")==-1:
|
401 |
ignoreline = 1
|
402 |
|
403 |
if ignoreline==0:
|
404 |
#print "count line, where len subset is " +str(len(subset)) +"/"+str(filesperjob)
|
405 |
ifile = ifile.strip("'")
|
406 |
ifile = ifile.strip('\',\n')
|
407 |
#ifile = ifile.strip("'")
|
408 |
if len(subset) == filesperjob:
|
409 |
njob = njob + 1
|
410 |
if fin_run == 0 and njob >= ini_run:
|
411 |
submit_jobs(njob,subset,ini_cfgfile,output_path)
|
412 |
elif fin_run > 0 and njob <= fin_run and njob >= ini_run:
|
413 |
submit_jobs(njob,subset,ini_cfgfile,output_path)
|
414 |
|
415 |
subset = []
|
416 |
#print "add line: " + ifile
|
417 |
subset.append(ifile)
|
418 |
|
419 |
#print " len = " + str(len(subset))+" subset = "
|
420 |
#print subset
|
421 |
|
422 |
if len(subset)>0:
|
423 |
njob = njob + 1
|
424 |
if fin_run == 0 and njob >= ini_run:
|
425 |
submit_jobs(njob,subset,ini_cfgfile,output_path)
|
426 |
elif fin_run > 0 and njob <= fin_run and njob >= ini_run:
|
427 |
submit_jobs(njob,subset,ini_cfgfile,output_path)
|
428 |
|
429 |
#submit_jobs(njob,subset,ini_cfgfile,output_path)
|
430 |
|
431 |
else:
|
432 |
ijob = 1
|
433 |
while ijob <= int(number_of_jobs):
|
434 |
submit_jobs(ijob,subset,ini_cfgfile,output_path)
|
435 |
ijob = ijob + 1
|
436 |
|
437 |
|
438 |
|