3 |
|
import sys |
4 |
|
import pickle |
5 |
|
import ROOT |
6 |
+ |
ROOT.gROOT.SetBatch(True) |
7 |
|
from array import array |
7 |
– |
from myutils import BetterConfigParser, sample, printc, mvainfo, parse_info |
8 |
– |
#ToDo: |
9 |
– |
from gethistofromtree import getScale |
10 |
– |
|
8 |
|
#warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' ) |
9 |
|
#usage: ./train run gui |
10 |
|
|
14 |
– |
|
11 |
|
#CONFIGURE |
12 |
|
argv = sys.argv |
13 |
|
parser = OptionParser() |
21 |
|
if opts.config =="": |
22 |
|
opts.config = "config" |
23 |
|
|
24 |
+ |
#Import after configure to get help message |
25 |
+ |
from myutils import BetterConfigParser, mvainfo, ParseInfo, TreeCache |
26 |
+ |
|
27 |
|
#load config |
28 |
|
config = BetterConfigParser() |
29 |
|
config.read(opts.config) |
35 |
|
global_rescale=2. |
36 |
|
|
37 |
|
#get locations: |
38 |
< |
MVAdir=config.get('Directories','vhbbpath')+'/data_test/' |
38 |
> |
MVAdir=config.get('Directories','vhbbpath')+'/data/' |
39 |
|
samplesinfo=config.get('Directories','samplesinfo') |
40 |
|
|
41 |
|
#systematics |
47 |
|
VHbbNameSpace=config.get('VHbbNameSpace','library') |
48 |
|
ROOT.gSystem.Load(VHbbNameSpace) |
49 |
|
|
51 |
– |
def getTree(job,cut,path,subsample=-1): |
52 |
– |
#print path+'/'+job.getpath() |
53 |
– |
newinput = ROOT.TFile.Open(path+'/'+job.getpath(),'read') |
54 |
– |
output.cd() |
55 |
– |
Tree = newinput.Get(job.tree) |
56 |
– |
#Tree.SetDirectory(0) |
57 |
– |
|
58 |
– |
|
59 |
– |
if subsample>-1: |
60 |
– |
#print 'cut: (%s) & (%s)'%(cut,job.subcuts[subsample]) |
61 |
– |
CuttedTree=Tree.CopyTree('(%s) & (%s)'%(cut,job.subcuts[subsample])) |
62 |
– |
CuttedTree.SetNameTitle(job.subnames[subsample],job.subnames[subsample]) |
63 |
– |
print '\t--> read in %s'%job.subnames[subsample] |
64 |
– |
else: |
65 |
– |
CuttedTree=Tree.CopyTree(cut) |
66 |
– |
CuttedTree.SetNameTitle(job.name,job.name) |
67 |
– |
print '\t--> read in %s'%job.name |
68 |
– |
newinput.Close() |
69 |
– |
|
70 |
– |
#CuttedTree.SetDirectory(0) |
71 |
– |
return CuttedTree |
72 |
– |
|
73 |
– |
#def getScale(job,subsample=-1): |
74 |
– |
# input = TFile.Open(job.getpath()) |
75 |
– |
# CountWithPU = input.Get("CountWithPU") |
76 |
– |
# CountWithPU2011B = input.Get("CountWithPU2011B") |
77 |
– |
# #print lumi*xsecs[i]/hist.GetBinContent(1) |
78 |
– |
# return float(job.lumi)*float(job.xsec)*float(job.sf)/(0.46502*CountWithPU.GetBinContent(1)+0.53498*CountWithPU2011B.GetBinContent(1))*2/float(job.split) |
79 |
– |
|
50 |
|
#CONFIG |
81 |
– |
#suffix for output name |
82 |
– |
suffix='_newVars_v2' |
51 |
|
#factory |
52 |
|
factoryname=config.get('factory','factoryname') |
53 |
|
factorysettings=config.get('factory','factorysettings') |
55 |
|
MVAtype=config.get(run,'MVAtype') |
56 |
|
MVAname=run |
57 |
|
MVAsettings=config.get(run,'MVAsettings') |
58 |
< |
fnameOutput = MVAdir+factoryname+'_'+MVAname+suffix+'.root' |
58 |
> |
fnameOutput = MVAdir+factoryname+'_'+MVAname+'.root' |
59 |
|
#locations |
60 |
|
path=config.get('Directories','SYSout') |
61 |
|
|
77 |
|
MVA_Vars={} |
78 |
|
MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal') |
79 |
|
MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ') |
112 |
– |
#Spectators: |
113 |
– |
#spectators=config.get(treeVarSet,'spectators') |
114 |
– |
#spectators=spectators.split(' ') |
115 |
– |
|
116 |
– |
#TRAINING samples |
117 |
– |
#infofile = open(samplesinfo,'r') |
118 |
– |
#info = pickle.load(infofile) |
119 |
– |
#infofile.close() |
80 |
|
|
81 |
< |
info = parse_info(samplesinfo,path) |
81 |
> |
#Infofile |
82 |
> |
info = ParseInfo(samplesinfo,path) |
83 |
|
|
84 |
|
#Workdir |
85 |
|
workdir=ROOT.gDirectory.GetPath() |
87 |
|
|
88 |
|
TrainCut='%s & EventForTraining==1'%TCut |
89 |
|
EvalCut='%s & EventForTraining==0'%TCut |
90 |
+ |
cuts = [TrainCut,EvalCut] |
91 |
|
|
130 |
– |
#load TRAIN trees |
131 |
– |
Tbackgrounds = [] |
132 |
– |
TbScales = [] |
133 |
– |
Tsignals = [] |
134 |
– |
TsScales = [] |
92 |
|
|
93 |
+ |
samples = [] |
94 |
+ |
samples = info.get_samples(signals+backgrounds) |
95 |
|
|
96 |
+ |
tc = TreeCache(cuts,samples,path) |
97 |
|
|
98 |
|
output = ROOT.TFile.Open(fnameOutput, "RECREATE") |
99 |
|
|
100 |
< |
print '\n\t>>> TRAINING EVENTS <<<\n' |
100 |
> |
print '\n\t>>> READING EVENTS <<<\n' |
101 |
|
|
102 |
< |
for job in info: |
103 |
< |
if eval(job.active): |
104 |
< |
|
105 |
< |
if job.subsamples: |
106 |
< |
print '\tREADING IN SUBSAMPLES of %s'%job.name |
107 |
< |
for subsample in range(0,len(job.group)): |
108 |
< |
if job.subnames[subsample] in signals: |
109 |
< |
print '\t- %s as SIG'%job.group[subsample] |
110 |
< |
Tsignal = getTree(job,TrainCut,path,subsample) |
151 |
< |
ROOT.gDirectory.Cd(workdir) |
152 |
< |
TsScale = getScale(job,path,config,global_rescale,subsample) |
153 |
< |
Tsignals.append(Tsignal) |
154 |
< |
TsScales.append(TsScale) |
155 |
< |
print '\t\t\t%s events'%Tsignal.GetEntries() |
156 |
< |
elif job.subnames[subsample] in backgrounds: |
157 |
< |
print '\t- %s as BKG'%job.group[subsample] |
158 |
< |
Tbackground = getTree(job,TrainCut,path,subsample) |
159 |
< |
ROOT.gDirectory.Cd(workdir) |
160 |
< |
TbScale = getScale(job,path,config,global_rescale,subsample) |
161 |
< |
Tbackgrounds.append(Tbackground) |
162 |
< |
TbScales.append(TbScale) |
163 |
< |
print '\t\t\t%s events'%Tbackground.GetEntries() |
164 |
< |
|
165 |
< |
else: |
166 |
< |
if job.name in signals: |
167 |
< |
print '\tREADING IN %s AS SIG'%job.name |
168 |
< |
Tsignal = getTree(job,TrainCut,path) |
169 |
< |
ROOT.gDirectory.Cd(workdir) |
170 |
< |
TsScale = getScale(job,path,config,global_rescale) |
171 |
< |
Tsignals.append(Tsignal) |
172 |
< |
TsScales.append(TsScale) |
173 |
< |
print '\t\t\t%s events'%Tsignal.GetEntries() |
174 |
< |
elif job.name in backgrounds: |
175 |
< |
print '\tREADING IN %s AS BKG'%job.name |
176 |
< |
Tbackground = getTree(job,TrainCut,path) |
177 |
< |
ROOT.gDirectory.Cd(workdir) |
178 |
< |
TbScale = getScale(job,path,config,global_rescale) |
179 |
< |
Tbackgrounds.append(Tbackground) |
180 |
< |
TbScales.append(TbScale) |
181 |
< |
print '\t\t\t%s events'%Tbackground.GetEntries() |
182 |
< |
|
183 |
< |
|
184 |
< |
#load EVALUATE trees |
102 |
> |
signal_samples = info.get_samples(signals) |
103 |
> |
background_samples = info.get_samples(backgrounds) |
104 |
> |
|
105 |
> |
#TRAIN trees |
106 |
> |
Tbackgrounds = [] |
107 |
> |
TbScales = [] |
108 |
> |
Tsignals = [] |
109 |
> |
TsScales = [] |
110 |
> |
#EVAL trees |
111 |
|
Ebackgrounds = [] |
112 |
|
EbScales = [] |
113 |
|
Esignals = [] |
114 |
|
EsScales = [] |
115 |
|
|
116 |
< |
print '\n\t>>> TESTING EVENTS <<<\n' |
117 |
< |
|
118 |
< |
|
119 |
< |
for job in info: |
120 |
< |
if eval(job.active): |
121 |
< |
|
122 |
< |
if job.subsamples: |
123 |
< |
print '\tREADING IN SUBSAMPLES of %s'%job.name |
124 |
< |
for subsample in range(0,len(job.group)): |
125 |
< |
if job.subnames[subsample] in signals: |
126 |
< |
print '\t- %s as SIG'%job.group[subsample] |
127 |
< |
Esignal = getTree(job,EvalCut,path,subsample) |
128 |
< |
ROOT.gDirectory.Cd(workdir) |
129 |
< |
EsScale = getScale(job,path,config,global_rescale,subsample) |
130 |
< |
Esignals.append(Esignal) |
131 |
< |
EsScales.append(EsScale) |
132 |
< |
print '\t\t\t%s events'%Esignal.GetEntries() |
133 |
< |
elif job.subnames[subsample] in backgrounds: |
134 |
< |
print '\t- %s as BKG'%job.group[subsample] |
135 |
< |
Ebackground = getTree(job,EvalCut,path,subsample) |
136 |
< |
ROOT.gDirectory.Cd(workdir) |
137 |
< |
EbScale = getScale(job,path,config,global_rescale,subsample) |
138 |
< |
Ebackgrounds.append(Ebackground) |
139 |
< |
EbScales.append(EbScale) |
140 |
< |
print '\t\t\t%s events'%Ebackground.GetEntries() |
141 |
< |
|
142 |
< |
else: |
217 |
< |
if job.name in signals: |
218 |
< |
print '\tREADING IN %s AS SIG'%job.name |
219 |
< |
Esignal = getTree(job,EvalCut,path) |
220 |
< |
ROOT.gDirectory.Cd(workdir) |
221 |
< |
EsScale = getScale(job,path,config,global_rescale) |
222 |
< |
Esignals.append(Esignal) |
223 |
< |
EsScales.append(EsScale) |
224 |
< |
print '\t\t\t%s events'%Esignal.GetEntries() |
225 |
< |
elif job.name in backgrounds: |
226 |
< |
print '\tREADING IN %s AS BKG'%job.name |
227 |
< |
Ebackground = getTree(job,EvalCut,path) |
228 |
< |
ROOT.gDirectory.Cd(workdir) |
229 |
< |
EbScale = getScale(job,path,config,global_rescale) |
230 |
< |
Ebackgrounds.append(Ebackground) |
231 |
< |
EbScales.append(EbScale) |
232 |
< |
print '\t\t\t%s events'%Ebackground.GetEntries() |
233 |
< |
|
116 |
> |
#load trees |
117 |
> |
for job in signal_samples: |
118 |
> |
print '\tREADING IN %s AS SIG'%job.name |
119 |
> |
Tsignal = tc.get_tree(job,TrainCut) |
120 |
> |
ROOT.gDirectory.Cd(workdir) |
121 |
> |
TsScale = tc.get_scale(job,config)*global_rescale |
122 |
> |
Tsignals.append(Tsignal) |
123 |
> |
TsScales.append(TsScale) |
124 |
> |
Esignal = tc.get_tree(job,EvalCut) |
125 |
> |
Esignals.append(Esignal) |
126 |
> |
EsScales.append(TsScale) |
127 |
> |
print '\t\t\tTraining %s events'%Tsignal.GetEntries() |
128 |
> |
print '\t\t\tEval %s events'%Esignal.GetEntries() |
129 |
> |
for job in background_samples: |
130 |
> |
print '\tREADING IN %s AS BKG'%job.name |
131 |
> |
Tbackground = tc.get_tree(job,TrainCut) |
132 |
> |
ROOT.gDirectory.Cd(workdir) |
133 |
> |
TbScale = tc.get_scale(job,config)*global_rescale |
134 |
> |
Tbackgrounds.append(Tbackground) |
135 |
> |
TbScales.append(TbScale) |
136 |
> |
Ebackground = tc.get_tree(job,EvalCut) |
137 |
> |
ROOT.gDirectory.Cd(workdir) |
138 |
> |
Ebackgrounds.append(Ebackground) |
139 |
> |
EbScales.append(TbScale) |
140 |
> |
print '\t\t\tTraining %s events'%Tbackground.GetEntries() |
141 |
> |
print '\t\t\tEval %s events'%Ebackground.GetEntries() |
142 |
> |
|
143 |
|
|
235 |
– |
#output = ROOT.TFile.Open(fnameOutput, "RECREATE") |
144 |
|
factory = ROOT.TMVA.Factory(factoryname, output, factorysettings) |
145 |
|
|
146 |
|
#set input trees |
147 |
|
for i in range(len(Tsignals)): |
240 |
– |
|
241 |
– |
#print 'Number of SIG entries: %s'%Tsignals[i].GetEntries() |
148 |
|
factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining) |
243 |
– |
#print 'Number of SIG entries: %s'%Esignals[i].GetEntries() |
149 |
|
factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting) |
150 |
|
|
151 |
|
for i in range(len(Tbackgrounds)): |
152 |
|
if (Tbackgrounds[i].GetEntries()>0): |
248 |
– |
#print 'Number of BKG entries: %s'%Tbackgrounds[i].GetEntries() |
153 |
|
factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining) |
154 |
|
|
155 |
|
if (Ebackgrounds[i].GetEntries()>0): |
252 |
– |
#print 'Number of BKG entries: %s'%Ebackgrounds[i].GetEntries() |
156 |
|
factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting) |
157 |
|
|
255 |
– |
|
158 |
|
for var in MVA_Vars['Nominal']: |
159 |
|
factory.AddVariable(var,'D') # add the variables |
258 |
– |
#for var in spectators: |
259 |
– |
# factory.AddSpectator(var,'D') #add specators |
160 |
|
|
161 |
|
#Execute TMVA |
162 |
|
factory.SetSignalWeightExpression(weightF) |
169 |
|
output.Write() |
170 |
|
|
171 |
|
#WRITE INFOFILE |
172 |
< |
infofile = open(MVAdir+factoryname+'_'+MVAname+suffix+'.info','w') |
172 |
> |
infofile = open(MVAdir+factoryname+'_'+MVAname+'.info','w') |
173 |
|
info=mvainfo(MVAname) |
174 |
|
info.factoryname=factoryname |
175 |
|
info.factorysettings=factorysettings |
179 |
|
info.path=path |
180 |
|
info.varset=treeVarSet |
181 |
|
info.vars=MVA_Vars['Nominal'] |
282 |
– |
#info.spectators=spectators |
182 |
|
pickle.dump(info,infofile) |
183 |
|
infofile.close() |
184 |
|
|