ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
Revision: 1.14
Committed: Thu Jan 10 20:03:54 2013 UTC (12 years, 4 months ago) by bortigno
Content type: text/x-python
Branch: MAIN
Changes since 1.13: +15 -14 lines
Log Message:
add loading of VHbb namespace. This allows to use namespace variables for the BDT training.

File Contents

# Content
1 #!/usr/bin/env python
2 from optparse import OptionParser
3 from BetterConfigParser import BetterConfigParser
4 import sys
5
6 #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
7 #usage: ./train run gui
8
9
10 #CONFIGURE
11 argv = sys.argv
12 parser = OptionParser()
13 parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
14 help="Verbose mode.")
15 parser.add_option("-T", "--training", dest="training", default="",
16 help="Training")
17 parser.add_option("-C", "--config", dest="config", default=[], action="append",
18 help="configuration file")
19 (opts, args) = parser.parse_args(argv)
20 if opts.config =="":
21 opts.config = "config"
22
23
24 from samplesclass import sample
25 from printcolor import printc
26 import pickle
27 import ROOT
28 from ROOT import TFile, TTree
29 import ROOT
30 from array import array
31 from mvainfos import mvainfo
32 from gethistofromtree import getScale
33
34
35
36 #load config
37 config = BetterConfigParser()
38 config.read(opts.config)
39 anaTag = config.get("Analysis","tag")
40 run=opts.training
41 gui=opts.verbose
42
43 #GLOABAL rescale from Train/Test Spliiting:
44 global_rescale=2.
45
46 #get locations:
47 MVAdir=config.get('Directories','vhbbpath')+'/data/'
48 samplesinfo=config.get('Directories','samplesinfo')
49
50 #systematics
51 systematics=config.get('systematics','systematics')
52 systematics=systematics.split(' ')
53
54 weightF=config.get('Weights','weightF')
55
56 VHbbNameSpace=config.get('VHbbNameSpace','library')
57 ROOT.gSystem.Load(VHbbNameSpace)
58
59 def getTree(job,cut,path,subsample=-1):
60 #print path+'/'+job.getpath()
61 newinput = TFile.Open(path+'/'+job.getpath(),'read')
62 output.cd()
63 Tree = newinput.Get(job.tree)
64 #Tree.SetDirectory(0)
65
66
67 if subsample>-1:
68 #print 'cut: (%s) & (%s)'%(cut,job.subcuts[subsample])
69 CuttedTree=Tree.CopyTree('(%s) & (%s)'%(cut,job.subcuts[subsample]))
70 #print '\t--> read in %s'%job.group[subsample]
71
72 else:
73 CuttedTree=Tree.CopyTree(cut)
74 #print '\t--> read in %s'%job.name
75 newinput.Close()
76
77 #CuttedTree.SetDirectory(0)
78 return CuttedTree
79
80 #def getScale(job,subsample=-1):
81 # input = TFile.Open(job.getpath())
82 # CountWithPU = input.Get("CountWithPU")
83 # CountWithPU2011B = input.Get("CountWithPU2011B")
84 # #print lumi*xsecs[i]/hist.GetBinContent(1)
85 # return float(job.lumi)*float(job.xsec)*float(job.sf)/(0.46502*CountWithPU.GetBinContent(1)+0.53498*CountWithPU2011B.GetBinContent(1))*2/float(job.split)
86
87
88
89 #CONFIG
90 #factory
91 factoryname=config.get('factory','factoryname')
92 factorysettings=config.get('factory','factorysettings')
93 #MVA
94 MVAtype=config.get(run,'MVAtype')
95 MVAname=run
96 MVAsettings=config.get(run,'MVAsettings')
97 fnameOutput = MVAdir+factoryname+'_'+MVAname+'.root'
98 #locations
99 path=config.get(run,'path')
100
101 TCutname=config.get(run, 'treeCut')
102 TCut=config.get('Cuts',TCutname)
103 #print TCut
104
105 #signals
106 signals=config.get(run,'signals')
107 signals=signals.split(' ')
108 #backgrounds
109 backgrounds=config.get(run,'backgrounds')
110 backgrounds=backgrounds.split(' ')
111
112 treeVarSet=config.get(run,'treeVarSet')
113
114 #variables
115 #TreeVar Array
116 MVA_Vars={}
117 MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
118 MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')
119 #Spectators:
120 #spectators=config.get(treeVarSet,'spectators')
121 #spectators=spectators.split(' ')
122
123 #TRAINING samples
124 infofile = open(samplesinfo,'r')
125 info = pickle.load(infofile)
126 infofile.close()
127
128 #Workdir
129 workdir=ROOT.gDirectory.GetPath()
130
131
132 TrainCut='%s & EventForTraining==1'%TCut
133 EvalCut='%s & EventForTraining==0'%TCut
134
135 #load TRAIN trees
136 Tbackgrounds = []
137 TbScales = []
138 Tsignals = []
139 TsScales = []
140
141
142
143 output = ROOT.TFile.Open(fnameOutput, "RECREATE")
144
145 print '\n\t>>> TRAINING EVENTS <<<\n'
146
147 for job in info:
148 if eval(job.active):
149
150 if job.subsamples:
151 print '\tREADING IN SUBSAMPLES of %s'%job.name
152 for subsample in range(0,len(job.group)):
153 if job.subnames[subsample] in signals:
154 print '\t- %s as SIG'%job.group[subsample]
155 Tsignal = getTree(job,TrainCut,path,subsample)
156 ROOT.gDirectory.Cd(workdir)
157 TsScale = getScale(job,path,config,global_rescale,subsample)
158 Tsignals.append(Tsignal)
159 TsScales.append(TsScale)
160 print '\t\t\t%s events'%Tsignal.GetEntries()
161 elif job.subnames[subsample] in backgrounds:
162 print '\t- %s as BKG'%job.group[subsample]
163 Tbackground = getTree(job,TrainCut,path,subsample)
164 ROOT.gDirectory.Cd(workdir)
165 TbScale = getScale(job,path,config,global_rescale,subsample)
166 Tbackgrounds.append(Tbackground)
167 TbScales.append(TbScale)
168 print '\t\t\t%s events'%Tbackground.GetEntries()
169
170 else:
171 if job.name in signals:
172 print '\tREADING IN %s AS SIG'%job.name
173 Tsignal = getTree(job,TrainCut,path)
174 ROOT.gDirectory.Cd(workdir)
175 TsScale = getScale(job,path,config,global_rescale)
176 Tsignals.append(Tsignal)
177 TsScales.append(TsScale)
178 print '\t\t\t%s events'%Tsignal.GetEntries()
179 elif job.name in backgrounds:
180 print '\tREADING IN %s AS BKG'%job.name
181 Tbackground = getTree(job,TrainCut,path)
182 ROOT.gDirectory.Cd(workdir)
183 TbScale = getScale(job,path,config,global_rescale)
184 Tbackgrounds.append(Tbackground)
185 TbScales.append(TbScale)
186 print '\t\t\t%s events'%Tbackground.GetEntries()
187
188
189 #load EVALUATE trees
190 Ebackgrounds = []
191 EbScales = []
192 Esignals = []
193 EsScales = []
194
195 print '\n\t>>> TESTING EVENTS <<<\n'
196
197
198 for job in info:
199 if eval(job.active):
200
201 if job.subsamples:
202 print '\tREADING IN SUBSAMPLES of %s'%job.name
203 for subsample in range(0,len(job.group)):
204 if job.subnames[subsample] in signals:
205 print '\t- %s as SIG'%job.group[subsample]
206 Esignal = getTree(job,EvalCut,path,subsample)
207 ROOT.gDirectory.Cd(workdir)
208 EsScale = getScale(job,path,config,global_rescale,subsample)
209 Esignals.append(Esignal)
210 EsScales.append(EsScale)
211 print '\t\t\t%s events'%Esignal.GetEntries()
212 elif job.subnames[subsample] in backgrounds:
213 print '\t- %s as BKG'%job.group[subsample]
214 Ebackground = getTree(job,EvalCut,path,subsample)
215 ROOT.gDirectory.Cd(workdir)
216 EbScale = getScale(job,path,config,global_rescale,subsample)
217 Ebackgrounds.append(Ebackground)
218 EbScales.append(EbScale)
219 print '\t\t\t%s events'%Ebackground.GetEntries()
220
221 else:
222 if job.name in signals:
223 print '\tREADING IN %s AS SIG'%job.name
224 Esignal = getTree(job,EvalCut,path)
225 ROOT.gDirectory.Cd(workdir)
226 EsScale = getScale(job,path,config,global_rescale)
227 Esignals.append(Esignal)
228 EsScales.append(EsScale)
229 print '\t\t\t%s events'%Esignal.GetEntries()
230 elif job.name in backgrounds:
231 print '\tREADING IN %s AS BKG'%job.name
232 Ebackground = getTree(job,EvalCut,path)
233 ROOT.gDirectory.Cd(workdir)
234 EbScale = getScale(job,path,config,global_rescale)
235 Ebackgrounds.append(Ebackground)
236 EbScales.append(EbScale)
237 print '\t\t\t%s events'%Ebackground.GetEntries()
238
239
240 #output = ROOT.TFile.Open(fnameOutput, "RECREATE")
241 factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
242
243 #set input trees
244 for i in range(len(Tsignals)):
245
246 #print 'Number of SIG entries: %s'%Tsignals[i].GetEntries()
247 factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
248 #print 'Number of SIG entries: %s'%Esignals[i].GetEntries()
249 factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
250
251 for i in range(len(Tbackgrounds)):
252 if (Tbackgrounds[i].GetEntries()>0):
253 #print 'Number of BKG entries: %s'%Tbackgrounds[i].GetEntries()
254 factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
255
256 if (Ebackgrounds[i].GetEntries()>0):
257 #print 'Number of BKG entries: %s'%Ebackgrounds[i].GetEntries()
258 factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
259
260
261 for var in MVA_Vars['Nominal']:
262 factory.AddVariable(var,'D') # add the variables
263 #for var in spectators:
264 # factory.AddSpectator(var,'D') #add specators
265
266 #Execute TMVA
267 factory.SetSignalWeightExpression(weightF)
268 factory.SetBackgroundWeightExpression(weightF)
269 factory.Verbose()
270 factory.BookMethod(MVAtype,MVAname,MVAsettings)
271 factory.TrainAllMethods()
272 factory.TestAllMethods()
273 factory.EvaluateAllMethods()
274 output.Write()
275
276 #WRITE INFOFILE
277 infofile = open(MVAdir+factoryname+'_'+MVAname+'.info','w')
278 info=mvainfo(MVAname)
279 info.factoryname=factoryname
280 info.factorysettings=factorysettings
281 info.MVAtype=MVAtype
282 info.MVAsettings=MVAsettings
283 info.weightfilepath=MVAdir
284 info.path=path
285 info.varset=treeVarSet
286 info.vars=MVA_Vars['Nominal']
287 #info.spectators=spectators
288 pickle.dump(info,infofile)
289 infofile.close()
290
291 # open the TMVA Gui
292 if gui == True:
293 ROOT.gROOT.ProcessLine( ".L TMVAGui.C")
294 ROOT.gROOT.ProcessLine( "TMVAGui(\"%s\")" % fnameOutput )
295 ROOT.gApplication.Run()
296
297