1 |
jengbou |
1.1 |
#!/usr/bin/env python
|
2 |
|
|
############################################################################
|
3 |
|
|
#
|
4 |
|
|
# Author: Francisco Yumiceva
|
5 |
|
|
# yumiceva@fnal.gov
|
6 |
|
|
#
|
7 |
|
|
# Fermilab, 2007
|
8 |
|
|
#
|
9 |
|
|
# 2009/07/14: Modification to prevent empty string for input fileNames
|
10 |
|
|
# Geng-yuan Jeng
|
11 |
|
|
############################################################################
|
12 |
|
|
|
13 |
|
|
"""
|
14 |
|
|
Submit jobs to condor. It will create directories in the output path
|
15 |
|
|
to store configuration files and log files.
|
16 |
|
|
|
17 |
|
|
e.g. submitToCondor.py -c ZprimeEventsReco_full.py -l datasets/zp1Tev.txt -n 10 -o /uscms_data/d1/yumiceva/CMSSW_1_3_6/TQAFAnalyzer/
|
18 |
|
|
|
19 |
|
|
where ZprimeEventsReco_full.cfg is the configuration file. The configuration file
|
20 |
|
|
needs to be modified a bit: (1) remove all input files in the PoolSource and add a
|
21 |
|
|
keyword {FILENAME} you need also the brackets. The filename of the output file should
|
22 |
|
|
be changed to another keyword called {OUTPUT_FILENAME}.
|
23 |
|
|
|
24 |
|
|
To Generate MC: In your cfg file you need to add also the keywords {SEED1},...,{SEED6} for
|
25 |
|
|
the random seeds which are going to be modified by the script using PYTHON random numbers,
|
26 |
|
|
you also need to change the output filename to {OUTPUT_FILENAME} All keywords need to have
|
27 |
|
|
brackets.
|
28 |
|
|
|
29 |
|
|
usage: %prog
|
30 |
|
|
-m, --mc : to generate MC. it will handle random numbers.
|
31 |
|
|
-e, --events = EVENTS: number of events to generate MC. If -m flag is turned on.
|
32 |
|
|
-c, --cfg = CFG: python configuration file
|
33 |
|
|
-l, --list = LIST: file with list of files (dataset)
|
34 |
|
|
-n, --njobs = NJOBS: number of jobs.
|
35 |
|
|
-o, --output = OUTPUT: output path
|
36 |
|
|
-i, --initial= INITIAL: number of initial job
|
37 |
|
|
-f, --final= FINAL: number of final job
|
38 |
|
|
-s, --short : for short jobs that need high priority
|
39 |
|
|
-t, --test : do not submit anything just show what I would do
|
40 |
|
|
|
41 |
|
|
Author: Francisco Yumiceva (yumiceva@fnal.gov)
|
42 |
|
|
"""
|
43 |
|
|
|
44 |
|
|
|
45 |
|
|
import os,sys
|
46 |
|
|
try:
|
47 |
|
|
import random
|
48 |
|
|
except:
|
49 |
|
|
# now fix PYTHONPATH for the damn cmslpc python configuration
|
50 |
|
|
atemp = os.getenv("PYTHONPATH")
|
51 |
|
|
atempl = atemp.split(":")
|
52 |
|
|
natemp = ""
|
53 |
|
|
for iatemp in atempl:
|
54 |
|
|
if iatemp.find("python2.3") == -1:
|
55 |
|
|
natemp = natemp + iatemp
|
56 |
|
|
print "\n FIX PYTHONPATH environment variable, try:\n"
|
57 |
|
|
print "setenv PYTHONPATH "+ natemp
|
58 |
|
|
sys.exit()
|
59 |
|
|
#os.putenv("PYTHONPATH",natemp)
|
60 |
|
|
# PYTHONPATH fixed
|
61 |
|
|
|
62 |
|
|
import string, re
|
63 |
|
|
from time import gmtime, localtime, strftime
|
64 |
|
|
|
65 |
|
|
#_______________OPTIONS__________________________________
|
66 |
|
|
import optparse
|
67 |
|
|
USAGE = re.compile(r'(?s)\s*usage: (.*?)(\n[ \t]*\n|$)')
|
68 |
|
|
def nonzero(self): # will become the nonzero method of optparse.Values
|
69 |
|
|
"True if options were given"
|
70 |
|
|
for v in self.__dict__.itervalues():
|
71 |
|
|
if v is not None: return True
|
72 |
|
|
return False
|
73 |
|
|
|
74 |
|
|
optparse.Values.__nonzero__ = nonzero # dynamically fix optparse.Values
|
75 |
|
|
|
76 |
|
|
class ParsingError(Exception): pass
|
77 |
|
|
|
78 |
|
|
optionstring=""
|
79 |
|
|
|
80 |
|
|
def exit(msg=""):
|
81 |
|
|
raise SystemExit(msg or optionstring.replace("%prog",sys.argv[0]))
|
82 |
|
|
|
83 |
|
|
def parse(docstring, arglist=None):
|
84 |
|
|
global optionstring
|
85 |
|
|
optionstring = docstring
|
86 |
|
|
match = USAGE.search(optionstring)
|
87 |
|
|
if not match: raise ParsingError("Cannot find the option string")
|
88 |
|
|
optlines = match.group(1).splitlines()
|
89 |
|
|
try:
|
90 |
|
|
p = optparse.OptionParser(optlines[0])
|
91 |
|
|
for line in optlines[1:]:
|
92 |
|
|
opt, help=line.split(':')[:2]
|
93 |
|
|
short,long=opt.split(',')[:2]
|
94 |
|
|
if '=' in opt:
|
95 |
|
|
action='store'
|
96 |
|
|
long=long.split('=')[0]
|
97 |
|
|
else:
|
98 |
|
|
action='store_true'
|
99 |
|
|
p.add_option(short.strip(),long.strip(),
|
100 |
|
|
action = action, help = help.strip())
|
101 |
|
|
except (IndexError,ValueError):
|
102 |
|
|
raise ParsingError("Cannot parse the option string correctly")
|
103 |
|
|
return p.parse_args(arglist)
|
104 |
|
|
|
105 |
|
|
#______________________________________________________________________
|
106 |
|
|
|
107 |
|
|
#
|
108 |
|
|
# path to this scripts and the used template file:
|
109 |
|
|
#
|
110 |
|
|
TemplatesPATH = os.environ['CMSSW_BASE']+"/src/submit2uaf/"
|
111 |
|
|
#
|
112 |
|
|
# path to the output that is created by this script
|
113 |
|
|
#
|
114 |
|
|
#main_path = "/uscmst1b_scratch/lpc1/cmsroc/yumiceva/CMSSW_1_2_0_pre3/src/RecoVertex/BeamSpotProducer/test/"
|
115 |
|
|
main_path = os.getcwd() + "/"
|
116 |
|
|
|
117 |
|
|
out_path = main_path+"condorjobs/"
|
118 |
|
|
|
119 |
|
|
scripts_path = main_path+"condorjobs/"
|
120 |
|
|
cfg_path = scripts_path+"cfg/"
|
121 |
|
|
#condor_path = scripts_path+"condor/"
|
122 |
|
|
csh_path = scripts_path+"csh/"
|
123 |
|
|
logs_path = scripts_path+"logs/"
|
124 |
|
|
|
125 |
|
|
|
126 |
|
|
istest = 0
|
127 |
|
|
isMC = 0
|
128 |
|
|
Nevents = "-1"
|
129 |
|
|
isshort = 0
|
130 |
|
|
|
131 |
|
|
#
|
132 |
|
|
# Path to the input/output data:
|
133 |
|
|
#
|
134 |
|
|
|
135 |
|
|
cfg_tags = ["{FILENAME}",
|
136 |
|
|
"{OUTPUT_FILENAME}"]
|
137 |
|
|
|
138 |
|
|
cfg_tags_mc = ["{SEED1}",
|
139 |
|
|
"{SEED2}",
|
140 |
|
|
"{SEED3}",
|
141 |
|
|
"{SEED4}",
|
142 |
|
|
"{SEED5}",
|
143 |
|
|
"{SEED6}",
|
144 |
|
|
"{OUTPUT_FILENAME}"]
|
145 |
|
|
|
146 |
|
|
scripts_tags = ["{PATHTOOUT}",
|
147 |
|
|
"{CONFIGFILE}",
|
148 |
|
|
"{OUTFILE}","{PATHTOBASE}","{OUTDIR}"]
|
149 |
|
|
|
150 |
|
|
condor_tags = ["{EXECUTABLE}",
|
151 |
|
|
"{OUTPUT}",
|
152 |
|
|
"{ERROR}",
|
153 |
|
|
"{LOG}",
|
154 |
|
|
"{NOTIFY}"]
|
155 |
|
|
|
156 |
|
|
|
157 |
|
|
template_fnames = {}
|
158 |
|
|
template_fnames["cfg"] = TemplatesPATH+"/reco_template.cfg"
|
159 |
|
|
template_fnames["condor"] = TemplatesPATH+"/template.condor"
|
160 |
|
|
template_fnames["csh"] = TemplatesPATH+"/runcms_new2.csh"
|
161 |
|
|
|
162 |
|
|
|
163 |
|
|
def _mkdir(newdir):
|
164 |
|
|
"""works the way a good mkdir should :)
|
165 |
|
|
- already exists, silently complete
|
166 |
|
|
- regular file in the way, raise an exception
|
167 |
|
|
- parent directory(ies) does not exist, make them as well
|
168 |
|
|
"""
|
169 |
|
|
if os.path.isdir(newdir):
|
170 |
|
|
pass
|
171 |
|
|
elif os.path.isfile(newdir):
|
172 |
|
|
raise OSError("a file with the same name as the desired " \
|
173 |
|
|
"dir, '%s', already exists." % newdir)
|
174 |
|
|
else:
|
175 |
|
|
head, tail = os.path.split(newdir)
|
176 |
|
|
if head and not os.path.isdir(head):
|
177 |
|
|
_mkdir(head)
|
178 |
|
|
if tail:
|
179 |
|
|
os.mkdir(newdir)
|
180 |
|
|
|
181 |
|
|
def change(infile,outfile,changearray,filearray):
|
182 |
|
|
fin = open(infile)
|
183 |
|
|
fout = open(outfile,"w")
|
184 |
|
|
|
185 |
|
|
#if line.find("untracked")!=-1 and line.find("PSet")!=-1 and line.find("maxEvents")!=-1:
|
186 |
|
|
# line = "untracked PSet maxEvents = {untracked int32 input = "+str(Nevents)+"}\n"
|
187 |
|
|
|
188 |
|
|
for line in fin.readlines():
|
189 |
|
|
for change in changearray:
|
190 |
|
|
#if change[0] == "{FILENAME}" and line.find(change[0])!=-1:
|
191 |
|
|
# line=line.replace(change[0] ,"")
|
192 |
|
|
# itmp=0
|
193 |
|
|
# for ifile in filearray:
|
194 |
|
|
# ifile = ifile.strip("\n")
|
195 |
|
|
# itmp = itmp + 1
|
196 |
|
|
# suffix = "\","
|
197 |
|
|
# if itmp == len(filearray):
|
198 |
|
|
# suffix = "\""
|
199 |
|
|
# fout.write("\""+ifile+suffix+"\n")
|
200 |
|
|
|
201 |
|
|
#else:
|
202 |
|
|
|
203 |
|
|
line=line.replace(change[0] ,change[1])
|
204 |
|
|
|
205 |
|
|
fout.write(line)
|
206 |
|
|
|
207 |
|
|
#
|
208 |
|
|
if outfile.find(".py")!=-1:
|
209 |
|
|
fout.write('''
|
210 |
|
|
process.maxEvents = cms.untracked.PSet(input = cms.untracked.int32(-1) )
|
211 |
|
|
|
212 |
|
|
process.source = cms.Source("PoolSource",
|
213 |
|
|
fileNames = cms.untracked.vstring(
|
214 |
|
|
''')
|
215 |
|
|
itmp=0
|
216 |
|
|
for ifile in filearray:
|
217 |
|
|
ifile = ifile.strip("\n")
|
218 |
|
|
itmp = itmp + 1
|
219 |
|
|
suffix = "\","
|
220 |
|
|
if itmp == len(filearray):
|
221 |
|
|
suffix = "\""
|
222 |
|
|
fout.write("\""+ifile+suffix+"\n")
|
223 |
|
|
|
224 |
|
|
fout.write(")\n")
|
225 |
|
|
fout.write(", duplicateCheckMode = cms.untracked.string('noDuplicateCheck')\n")
|
226 |
|
|
fout.write(")\n")
|
227 |
|
|
|
228 |
|
|
|
229 |
|
|
|
230 |
|
|
def submit_jobs(njob,array,ini_cfgfile,output_path):
|
231 |
|
|
|
232 |
|
|
cfgfile = ini_cfgfile.replace("_template","")
|
233 |
|
|
|
234 |
|
|
njobstr = str(njob)
|
235 |
|
|
while len(njobstr)<4:
|
236 |
|
|
njobstr = "0"+njobstr
|
237 |
|
|
|
238 |
|
|
outfilename_cfg = cfg_path + cfgfile.replace(".py","_"+njobstr+".py")
|
239 |
|
|
outfilename_root = output_path+ cfgfile.replace(".py","_"+njobstr+".root")
|
240 |
|
|
outfilename_log = logs_path + cfgfile.replace(".py","_"+njobstr+".log")
|
241 |
|
|
outfilename_csh = csh_path + cfgfile.replace(".py","_"+njobstr+".csh")
|
242 |
|
|
outfilename_condor= csh_path + cfgfile.replace(".py","_"+njobstr+".condor")
|
243 |
|
|
|
244 |
|
|
if os.path.isfile(outfilename_root):
|
245 |
|
|
print "Not submitting into condor batch farm since rootoutput already exists"
|
246 |
|
|
print "delete or rename: "+outfilename_root+"\n to resubmit"
|
247 |
|
|
sys.exit(main())
|
248 |
|
|
|
249 |
|
|
#
|
250 |
|
|
# First create config (.cfg) file:
|
251 |
|
|
#
|
252 |
|
|
|
253 |
|
|
changearray=[]
|
254 |
|
|
|
255 |
|
|
if isMC:
|
256 |
|
|
random.seed(njob)
|
257 |
|
|
changearray.append((cfg_tags_mc[0],str(random.randint(1,987654321) ) ))
|
258 |
|
|
changearray.append((cfg_tags_mc[1],str(random.randint(1,987654321) ) ))
|
259 |
|
|
changearray.append((cfg_tags_mc[2],str(random.randint(1,987654321) ) ))
|
260 |
|
|
changearray.append((cfg_tags_mc[3],str(random.randint(1,987654321) ) ))
|
261 |
|
|
changearray.append((cfg_tags_mc[4],str(random.randint(1,987654321) ) ))
|
262 |
|
|
changearray.append((cfg_tags_mc[5],str(random.randint(1,987654321) ) ))
|
263 |
|
|
changearray.append((cfg_tags_mc[6],outfilename_root))
|
264 |
|
|
change(template_fnames["cfg"],outfilename_cfg,changearray,array)
|
265 |
|
|
#print outfilename_cfg + " has been written.\n"
|
266 |
|
|
|
267 |
|
|
else:
|
268 |
|
|
changearray.append((cfg_tags[0],""))
|
269 |
|
|
changearray.append((cfg_tags[1],outfilename_root))
|
270 |
|
|
change(template_fnames["cfg"],outfilename_cfg,changearray,array)
|
271 |
|
|
#print outfilename_cfg + " has been written.\n"
|
272 |
|
|
|
273 |
|
|
|
274 |
|
|
#
|
275 |
|
|
# now create the script to process the file:
|
276 |
|
|
#
|
277 |
|
|
changearray=[]
|
278 |
|
|
changearray.append((scripts_tags[0],output_path+"/"))
|
279 |
|
|
changearray.append((scripts_tags[1],outfilename_cfg))
|
280 |
|
|
changearray.append((scripts_tags[2],outfilename_root))
|
281 |
|
|
changearray.append((scripts_tags[3],os.getenv("CMSSW_BASE")+"/src" ))
|
282 |
|
|
changearray.append((scripts_tags[4],output_path))
|
283 |
|
|
change(template_fnames["csh"],outfilename_csh,changearray,0)
|
284 |
|
|
os.chmod(outfilename_csh,0775)
|
285 |
|
|
#print outfilename_csh + " has been written.\n"
|
286 |
|
|
|
287 |
|
|
#
|
288 |
|
|
# finally create the condor job description file:
|
289 |
|
|
#
|
290 |
|
|
stdout = outfilename_log.replace(".log",".stdout")
|
291 |
|
|
stderr = outfilename_log.replace(".log",".stderr")
|
292 |
|
|
logfiles = outfilename_log.replace(".log",".log")
|
293 |
|
|
changearray =[]
|
294 |
|
|
changearray.append((condor_tags[0],outfilename_csh))
|
295 |
|
|
changearray.append((condor_tags[1],stdout))
|
296 |
|
|
changearray.append((condor_tags[2],stderr))
|
297 |
|
|
changearray.append((condor_tags[3],logfiles))
|
298 |
|
|
changearray.append((condor_tags[4],os.getenv("USER")+"@fnal.gov"))
|
299 |
|
|
change(template_fnames["condor"],outfilename_condor,changearray,0)
|
300 |
|
|
if isshort:
|
301 |
|
|
fout = open(outfilename_condor,"a")
|
302 |
|
|
fout.write("+LENGTH=\"SHORT\"\n")
|
303 |
|
|
fout.close()
|
304 |
|
|
|
305 |
|
|
#print outfilename_condor + " has been written.\n"
|
306 |
|
|
|
307 |
|
|
submitcommand ="/opt/condor/bin/condor_submit "+outfilename_condor
|
308 |
|
|
print submitcommand+"\n"
|
309 |
|
|
|
310 |
|
|
if not istest:
|
311 |
|
|
child = os.popen(submitcommand)
|
312 |
|
|
else:
|
313 |
|
|
print "Not submitting into condor batch farm. This is only a test"
|
314 |
|
|
|
315 |
|
|
|
316 |
|
|
|
317 |
|
|
#________________________________________________________________
|
318 |
|
|
if __name__ =='__main__':
|
319 |
|
|
|
320 |
|
|
|
321 |
|
|
#import optionparse
|
322 |
|
|
option,args = parse(__doc__)
|
323 |
|
|
if not args and not option: exit()
|
324 |
|
|
|
325 |
|
|
if not option.mc and (not option.cfg or not option.njobs or not option.list):
|
326 |
|
|
print " you need to provide configuration file, list of files, number of jobs"
|
327 |
|
|
optionparse.exit()
|
328 |
|
|
|
329 |
|
|
ini_cfgfile = option.cfg
|
330 |
|
|
|
331 |
|
|
template_fnames["cfg"] = ini_cfgfile
|
332 |
|
|
|
333 |
|
|
if not option.mc:
|
334 |
|
|
list_of_files = option.list
|
335 |
|
|
else:
|
336 |
|
|
Nevents = str(option.events)
|
337 |
|
|
|
338 |
|
|
number_of_jobs = option.njobs
|
339 |
|
|
|
340 |
|
|
ini_run = 0
|
341 |
|
|
fin_run = 0
|
342 |
|
|
|
343 |
|
|
if option.initial:
|
344 |
|
|
ini_run = int(option.initial)
|
345 |
|
|
if option.final:
|
346 |
|
|
fin_run = int(option.final)
|
347 |
|
|
|
348 |
|
|
istest = option.test
|
349 |
|
|
isMC = option.mc
|
350 |
|
|
isshort = option.short
|
351 |
|
|
|
352 |
|
|
output_path = ""
|
353 |
|
|
|
354 |
|
|
if option.output:
|
355 |
|
|
output_path = option.output+"/"
|
356 |
|
|
|
357 |
|
|
cfg_path = output_path+ "cfg/"
|
358 |
|
|
csh_path = output_path+ "csh/"
|
359 |
|
|
logs_path = output_path+ "logs/"
|
360 |
|
|
|
361 |
|
|
subset = []
|
362 |
|
|
njob = 0
|
363 |
|
|
notrootfiles = 0
|
364 |
|
|
|
365 |
|
|
#make directories
|
366 |
|
|
if not os.path.exists(cfg_path):
|
367 |
|
|
_mkdir(cfg_path)
|
368 |
|
|
if not os.path.exists(csh_path):
|
369 |
|
|
_mkdir(csh_path)
|
370 |
|
|
if not os.path.exists(logs_path):
|
371 |
|
|
_mkdir(logs_path)
|
372 |
|
|
|
373 |
|
|
|
374 |
|
|
#filesperjob = 0
|
375 |
|
|
#inputfile = []
|
376 |
|
|
if not option.mc:
|
377 |
|
|
inputfile = open(list_of_files)
|
378 |
|
|
totalfiles = len(inputfile.readlines())
|
379 |
|
|
inputfile.seek(0,0)
|
380 |
|
|
|
381 |
|
|
for iline in inputfile:
|
382 |
|
|
#print iline
|
383 |
|
|
if iline.find(".root")==-1:
|
384 |
|
|
notrootfiles += 1
|
385 |
|
|
|
386 |
|
|
#print notrootfiles
|
387 |
|
|
totalfiles -= notrootfiles
|
388 |
|
|
inputfile.seek(0,0)
|
389 |
|
|
|
390 |
|
|
filesperjob = float(totalfiles)/float(number_of_jobs)
|
391 |
|
|
filesperjob = int(filesperjob)
|
392 |
|
|
|
393 |
|
|
#print "Number of total root files = "
|
394 |
|
|
#print totalfiles
|
395 |
|
|
|
396 |
|
|
for ifile in inputfile:
|
397 |
|
|
#print "ifile: "+ifile
|
398 |
|
|
ignoreline = 0
|
399 |
|
|
|
400 |
|
|
if ifile.find(".root")==-1:
|
401 |
|
|
ignoreline = 1
|
402 |
|
|
|
403 |
|
|
if ignoreline==0:
|
404 |
|
|
#print "count line, where len subset is " +str(len(subset)) +"/"+str(filesperjob)
|
405 |
|
|
ifile = ifile.strip("'")
|
406 |
|
|
ifile = ifile.strip('\',\n')
|
407 |
|
|
#ifile = ifile.strip("'")
|
408 |
|
|
if len(subset) == filesperjob:
|
409 |
|
|
njob = njob + 1
|
410 |
|
|
if fin_run == 0 and njob >= ini_run:
|
411 |
|
|
submit_jobs(njob,subset,ini_cfgfile,output_path)
|
412 |
|
|
elif fin_run > 0 and njob <= fin_run and njob >= ini_run:
|
413 |
|
|
submit_jobs(njob,subset,ini_cfgfile,output_path)
|
414 |
|
|
|
415 |
|
|
subset = []
|
416 |
|
|
#print "add line: " + ifile
|
417 |
|
|
subset.append(ifile)
|
418 |
|
|
|
419 |
|
|
#print " len = " + str(len(subset))+" subset = "
|
420 |
|
|
#print subset
|
421 |
|
|
|
422 |
|
|
if len(subset)>0:
|
423 |
|
|
njob = njob + 1
|
424 |
|
|
if fin_run == 0 and njob >= ini_run:
|
425 |
|
|
submit_jobs(njob,subset,ini_cfgfile,output_path)
|
426 |
|
|
elif fin_run > 0 and njob <= fin_run and njob >= ini_run:
|
427 |
|
|
submit_jobs(njob,subset,ini_cfgfile,output_path)
|
428 |
|
|
|
429 |
|
|
#submit_jobs(njob,subset,ini_cfgfile,output_path)
|
430 |
|
|
|
431 |
|
|
else:
|
432 |
|
|
ijob = 1
|
433 |
|
|
while ijob <= int(number_of_jobs):
|
434 |
|
|
submit_jobs(ijob,subset,ini_cfgfile,output_path)
|
435 |
|
|
ijob = ijob + 1
|
436 |
|
|
|
437 |
|
|
|
438 |
|
|
|