1 |
nmohr |
1.1 |
#! /usr/bin/env python
|
2 |
nmohr |
1.6 |
from optparse import OptionParser
|
3 |
bortigno |
1.26 |
import sys
|
4 |
peller |
1.28 |
import time
|
5 |
|
|
import os
|
6 |
|
|
import shutil
|
7 |
nmohr |
1.7 |
|
8 |
nmohr |
1.6 |
parser = OptionParser()
|
9 |
|
|
parser.add_option("-T", "--tag", dest="tag", default="",
|
10 |
|
|
help="Tag to run the analysis with, example '8TeV' uses config8TeV and pathConfig8TeV to run the analysis")
|
11 |
peller |
1.9 |
parser.add_option("-J", "--task", dest="task", default="",
|
12 |
nmohr |
1.19 |
help="Task to be done, i.e. 'dc' for Datacards, 'prep' for preparation of Trees, 'plot' to produce plots or 'eval' to write the MVA output or 'sys' to write regression and systematics (or 'syseval' for both). ")
|
13 |
peller |
1.9 |
parser.add_option("-M", "--mass", dest="mass", default="125",
|
14 |
bortigno |
1.10 |
help="Mass for DC or Plots, 110...135")
|
15 |
|
|
parser.add_option("-S","--samples",dest="samples",default="",
|
16 |
|
|
help="samples you want to run on")
|
17 |
peller |
1.28 |
parser.add_option("-F", "--folderTag", dest="ftag", default="",
|
18 |
|
|
help="Creats a new folder structure for outputs or uses an existing one with the given name")
|
19 |
bortigno |
1.30 |
parser.add_option("-N", "--number-of-events", dest="nevents_split", default=100000,
|
20 |
|
|
help="Number of events per file when splitting.")
|
21 |
|
|
parser.add_option("-P", "--philipp-love-progress-bars", dest="philipp_love_progress_bars", default=False,
|
22 |
|
|
help="If you share the love of Philipp...")
|
23 |
|
|
|
24 |
bortigno |
1.26 |
(opts, args) = parser.parse_args(sys.argv)
|
25 |
|
|
|
26 |
bortigno |
1.30 |
import os,shutil,pickle,subprocess,ROOT,re
|
27 |
nmohr |
1.27 |
ROOT.gROOT.SetBatch(True)
|
28 |
bortigno |
1.31 |
from myutils import BetterConfigParser, Sample, ParseInfo, sample_parser
|
29 |
bortigno |
1.26 |
import getpass
|
30 |
nmohr |
1.6 |
|
31 |
|
|
if opts.tag == "":
|
32 |
|
|
print "Please provide tag to run the analysis with, example '-T 8TeV' uses config8TeV and pathConfig8TeV to run the analysis."
|
33 |
|
|
sys.exit(123)
|
34 |
peller |
1.9 |
|
35 |
|
|
if opts.task == "":
|
36 |
|
|
print "Please provide a task.\n-J prep:\tpreparation of Trees\n-J sys:\t\twrite regression and systematics\n-J eval:\tcreate MVA output\n-J plot:\tproduce Plots\n-J dc:\t\twrite workspaces and datacards"
|
37 |
|
|
sys.exit(123)
|
38 |
|
|
|
39 |
peller |
1.28 |
|
40 |
|
|
en = opts.tag
|
41 |
|
|
|
42 |
bortigno |
1.11 |
#create the list with the samples to run over
|
43 |
|
|
samplesList=opts.samples.split(",")
|
44 |
peller |
1.28 |
timestamp = time.asctime().replace(' ','_').replace(':','-')
|
45 |
|
|
|
46 |
bortigno |
1.35 |
# the list of the config is taken from the path config
|
47 |
peller |
1.28 |
pathconfig = BetterConfigParser()
|
48 |
|
|
pathconfig.read('%sconfig/paths'%(en))
|
49 |
bortigno |
1.35 |
_configs = pathconfig.get('Configuration','List').split(" ")
|
50 |
|
|
configs = [ '%sconfig/'%(en) + c for c in _configs ]
|
51 |
peller |
1.28 |
|
52 |
|
|
if not opts.ftag == '':
|
53 |
|
|
tagDir = pathconfig.get('Directories','tagDir')
|
54 |
|
|
DirStruct={'tagDir':tagDir,'ftagdir':'%s/%s/'%(tagDir,opts.ftag),'logpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Logs'),'plotpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Plots'),'limitpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Limits'),'confpath':'%s/%s/%s/'%(tagDir,opts.ftag,'config') }
|
55 |
|
|
|
56 |
|
|
for keys in ['tagDir','ftagdir','logpath','plotpath','limitpath','confpath']:
|
57 |
|
|
try:
|
58 |
|
|
os.stat(DirStruct[keys])
|
59 |
|
|
except:
|
60 |
|
|
os.mkdir(DirStruct[keys])
|
61 |
|
|
|
62 |
|
|
pathfile = open('%sconfig/paths'%(en))
|
63 |
|
|
buffer = pathfile.readlines()
|
64 |
|
|
pathfile.close()
|
65 |
|
|
os.rename('%sconfig/paths'%(en),'%sconfig/paths.bkp'%(en))
|
66 |
|
|
pathfile = open('%sconfig/paths'%(en),'w')
|
67 |
|
|
for line in buffer:
|
68 |
|
|
if line.startswith('plotpath'):
|
69 |
|
|
line = 'plotpath: %s\n'%DirStruct['plotpath']
|
70 |
|
|
elif line.startswith('logpath'):
|
71 |
|
|
line = 'logpath: %s\n'%DirStruct['logpath']
|
72 |
|
|
elif line.startswith('limits'):
|
73 |
|
|
line = 'limits: %s\n'%DirStruct['limitpath']
|
74 |
|
|
pathfile.write(line)
|
75 |
|
|
pathfile.close()
|
76 |
|
|
|
77 |
|
|
#copy config files
|
78 |
|
|
for item in configs:
|
79 |
|
|
shutil.copyfile(item,'%s/%s/%s'%(tagDir,opts.ftag,item.strip(en)))
|
80 |
|
|
|
81 |
|
|
|
82 |
nmohr |
1.6 |
print configs
|
83 |
|
|
config = BetterConfigParser()
|
84 |
|
|
config.read(configs)
|
85 |
peller |
1.28 |
|
86 |
bortigno |
1.35 |
|
87 |
|
|
def compile_macro(config,macro):
|
88 |
|
|
"""
|
89 |
|
|
Creates the library from a macro using CINT compiling it in scratch to avoid
|
90 |
|
|
problems with the linking in the working nodes.
|
91 |
|
|
Args:
|
92 |
|
|
config: configuration file where the macro path is specified
|
93 |
|
|
macro: macro name to be compiled
|
94 |
|
|
Returns:
|
95 |
|
|
nothing
|
96 |
|
|
"""
|
97 |
|
|
submitDir = os.getcwd()
|
98 |
|
|
_macro=macro+'.h'
|
99 |
|
|
library = config.get(macro,'library')
|
100 |
|
|
libDir=os.path.dirname(library)
|
101 |
|
|
os.chdir(libDir)
|
102 |
|
|
if not os.path.exists(library):
|
103 |
|
|
print '@INFO: Compiling ' + _macro
|
104 |
|
|
scratchDir='/scratch/%s/'%(getpass.getuser())
|
105 |
|
|
shutil.copyfile(libDir+'/'+_macro,'/scratch/%s/%s'%(getpass.getuser(),_macro))
|
106 |
|
|
os.chdir(scratchDir)
|
107 |
|
|
ROOT.gROOT.ProcessLine('.L %s+'%(scratchDir+_macro))
|
108 |
|
|
shutil.copyfile('/scratch/%s/%s'%(getpass.getuser(),os.path.basename(library)),library)
|
109 |
|
|
os.chdir(submitDir)
|
110 |
|
|
|
111 |
|
|
compile_macro(config,'BTagReshaping')
|
112 |
|
|
compile_macro(config,'VHbbNameSpace')
|
113 |
|
|
|
114 |
nmohr |
1.6 |
logPath = config.get("Directories","logpath")
|
115 |
nmohr |
1.34 |
logo = open('%s/data/submit.txt' %config.get('Directories','vhbbpath')).readlines()
|
116 |
|
|
counter = 0
|
117 |
|
|
|
118 |
bortigno |
1.14 |
#check if the logPath exist. If not exit
|
119 |
|
|
if( not os.path.isdir(logPath) ):
|
120 |
bortigno |
1.35 |
print '@ERROR : ' + logPath + ': dir not found.'
|
121 |
|
|
print '@ERROR : Create it before submitting '
|
122 |
nmohr |
1.34 |
print 'Exit'
|
123 |
|
|
sys.exit(-1)
|
124 |
|
|
|
125 |
bortigno |
1.14 |
|
126 |
bortigno |
1.30 |
repDict = {'en':en,'logpath':logPath,'job':'','task':opts.task,'queue': 'all.q','timestamp':timestamp,'additional':'','job_id':''}
|
127 |
nmohr |
1.6 |
def submit(job,repDict):
|
128 |
nmohr |
1.34 |
global counter
|
129 |
|
|
repDict['job'] = job
|
130 |
|
|
nJob = counter % len(logo)
|
131 |
|
|
counter += 1
|
132 |
|
|
if opts.philipp_love_progress_bars:
|
133 |
|
|
repDict['name'] = '"%s"' %logo[nJob].strip()
|
134 |
|
|
else:
|
135 |
|
|
repDict['name'] = '%(job)s_%(en)s%(task)s' %repDict
|
136 |
|
|
command = 'qsub -V -cwd -q %(queue)s -l h_vmem=6G -N %(name)s -o %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.out -e %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.err runAll.sh %(job)s %(en)s ' %(repDict) + opts.task + ' ' + repDict['job_id'] + ' ' + repDict['additional']
|
137 |
|
|
print command
|
138 |
|
|
subprocess.call([command], shell=True)
|
139 |
nmohr |
1.1 |
|
140 |
peller |
1.29 |
if opts.task == 'train':
|
141 |
|
|
train_list = (config.get('MVALists','List_for_submitscript')).split(',')
|
142 |
|
|
print train_list
|
143 |
|
|
for item in train_list:
|
144 |
|
|
submit(item,repDict)
|
145 |
|
|
|
146 |
|
|
|
147 |
peller |
1.9 |
if opts.task == 'dc':
|
148 |
peller |
1.18 |
#DC_vars = config.items('Limit')
|
149 |
|
|
DC_vars= (config.get('LimitGeneral','List')).split(',')
|
150 |
|
|
print DC_vars
|
151 |
|
|
|
152 |
peller |
1.9 |
if opts.task == 'plot':
|
153 |
peller |
1.15 |
Plot_vars= (config.get('Plot_general','List')).split(',')
|
154 |
peller |
1.8 |
|
155 |
peller |
1.12 |
if not opts.task == 'prep':
|
156 |
|
|
path = config.get("Directories","samplepath")
|
157 |
nmohr |
1.21 |
samplesinfo = config.get("Directories","samplesinfo")
|
158 |
nmohr |
1.27 |
info = ParseInfo(samplesinfo,path)
|
159 |
peller |
1.8 |
|
160 |
peller |
1.9 |
if opts.task == 'plot':
|
161 |
peller |
1.23 |
repDict['queue'] = 'all.q'
|
162 |
peller |
1.9 |
for item in Plot_vars:
|
163 |
peller |
1.15 |
submit(item,repDict)
|
164 |
peller |
1.9 |
|
165 |
bortigno |
1.30 |
|
166 |
peller |
1.9 |
elif opts.task == 'dc':
|
167 |
bortigno |
1.32 |
repDict['queue'] = 'short.q'
|
168 |
peller |
1.9 |
for item in DC_vars:
|
169 |
peller |
1.22 |
if 'ZH%s'%opts.mass in item:
|
170 |
peller |
1.18 |
submit(item,repDict)
|
171 |
peller |
1.22 |
elif 'ZH' in item and opts.mass == 'all':
|
172 |
peller |
1.18 |
submit(item,repDict)
|
173 |
peller |
1.17 |
|
174 |
peller |
1.9 |
elif opts.task == 'prep':
|
175 |
nmohr |
1.33 |
if ( opts.samples == ""):
|
176 |
|
|
path = config.get("Directories","PREPin")
|
177 |
|
|
samplesinfo = config.get("Directories","samplesinfo")
|
178 |
|
|
info = ParseInfo(samplesinfo,path)
|
179 |
|
|
for job in info:
|
180 |
|
|
submit(job.name,repDict)
|
181 |
peller |
1.9 |
|
182 |
nmohr |
1.33 |
else:
|
183 |
|
|
for sample in samplesList:
|
184 |
|
|
submit(sample,repDict)
|
185 |
bortigno |
1.30 |
elif opts.task == 'sys' or opts.task == 'syseval':
|
186 |
|
|
path = config.get("Directories","SYSin")
|
187 |
|
|
samplesinfo = config.get("Directories","samplesinfo")
|
188 |
|
|
info = ParseInfo(samplesinfo,path)
|
189 |
|
|
if ( opts.samples == ""):
|
190 |
|
|
for job in info:
|
191 |
nmohr |
1.34 |
if (job.subsample):
|
192 |
|
|
continue #avoid multiple submissions form subsamples
|
193 |
|
|
# TO FIX FOR SPLITTED SAMPLE
|
194 |
bortigno |
1.30 |
submit(job.name,repDict)
|
195 |
|
|
else:
|
196 |
|
|
for sample in samplesList:
|
197 |
|
|
submit(sample,repDict)
|
198 |
|
|
|
199 |
|
|
elif opts.task == 'eval':
|
200 |
|
|
path = config.get("Directories","MVAin")
|
201 |
|
|
samplesinfo = config.get("Directories","samplesinfo")
|
202 |
|
|
info = ParseInfo(samplesinfo,path)
|
203 |
bortigno |
1.10 |
if ( opts.samples == ""):
|
204 |
|
|
for job in info:
|
205 |
nmohr |
1.34 |
if (job.subsample):
|
206 |
|
|
continue #avoid multiple submissions from subsamples
|
207 |
|
|
if(info.checkSplittedSampleName(job.identifier)): # if multiple entries for one name (splitted samples) use the identifier to submit
|
208 |
|
|
print '@INFO: Splitted samples: submit through identifier'
|
209 |
|
|
submit(job.identifier,repDict)
|
210 |
|
|
else: submit(job.name,repDict)
|
211 |
bortigno |
1.10 |
else:
|
212 |
bortigno |
1.11 |
for sample in samplesList:
|
213 |
bortigno |
1.10 |
submit(sample,repDict)
|
214 |
peller |
1.20 |
|
215 |
bortigno |
1.30 |
|
216 |
|
|
elif( opts.task == 'split' ):
|
217 |
|
|
path = config.get("Directories","SPLITin")
|
218 |
|
|
samplesinfo = config.get("Directories","samplesinfo")
|
219 |
bortigno |
1.32 |
repDict['job_id']=opts.nevents_split
|
220 |
bortigno |
1.30 |
info = ParseInfo(samplesinfo,path)
|
221 |
bortigno |
1.32 |
if ( opts.samples == "" ):
|
222 |
bortigno |
1.30 |
for job in info:
|
223 |
bortigno |
1.32 |
if (job.subsample): continue #avoid multiple submissions from subsamples
|
224 |
bortigno |
1.30 |
submit(job.name,repDict)
|
225 |
|
|
else:
|
226 |
|
|
for sample in samplesList:
|
227 |
|
|
submit(sample,repDict)
|
228 |
|
|
|
229 |
|
|
#BDT optimisation
|
230 |
|
|
elif opts.task == 'mva_opt':
|
231 |
|
|
total_number_of_steps=1
|
232 |
|
|
setting = ''
|
233 |
|
|
for par in (config.get('Optimisation','parameters').split(',')):
|
234 |
|
|
scan_par=eval(config.get('Optimisation',par))
|
235 |
|
|
setting+=par+'='+str(scan_par[0])+':'
|
236 |
|
|
if len(scan_par) > 1 and scan_par[2] != 0:
|
237 |
|
|
total_number_of_steps+=scan_par[2]
|
238 |
|
|
setting=setting[:-1] # eliminate last column at the end of the setting string
|
239 |
|
|
print setting
|
240 |
|
|
repDict['additional']=setting
|
241 |
|
|
repDict['job_id']=config.get('Optimisation','training')
|
242 |
|
|
submit('OPT_main_set',repDict)
|
243 |
|
|
main_setting=setting
|
244 |
|
|
|
245 |
|
|
#Scanning all the parameters found in the training config in the Optimisation sector
|
246 |
|
|
for par in (config.get('Optimisation','parameters').split(',')):
|
247 |
|
|
scan_par=eval(config.get('Optimisation',par))
|
248 |
|
|
print par
|
249 |
|
|
if len(scan_par) > 1 and scan_par[2] != 0:
|
250 |
|
|
for step in range(scan_par[2]):
|
251 |
|
|
value = (scan_par[0])+((1+step)*(scan_par[1]-scan_par[0])/scan_par[2])
|
252 |
|
|
print value
|
253 |
|
|
setting=re.sub(par+'.*?:',par+'='+str(value)+':',main_setting)
|
254 |
|
|
repDict['additional']=setting
|
255 |
|
|
# repDict['job_id']=config.get('Optimisation','training')
|
256 |
|
|
submit('OPT_'+par+str(value),repDict)
|
257 |
|
|
# submit(config.get('Optimisation','training'),repDict)
|
258 |
|
|
print setting
|
259 |
|
|
|
260 |
|
|
|
261 |
|
|
os.system('qstat')
|
262 |
|
|
if (opts.philipp_love_progress_bars):
|
263 |
|
|
os.system('./qstat.py')
|