ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
Revision: 1.12
Committed: Fri Oct 5 16:24:52 2012 UTC (12 years, 7 months ago) by peller
Content type: text/x-python
Branch: MAIN
CVS Tags: hcpPreAppFreeze
Changes since 1.11: +2 -1 lines
Log Message:
weightF

File Contents

# Content
1 #!/usr/bin/env python
2 from samplesclass import sample
3 from printcolor import printc
4 import pickle
5 import ROOT
6 from ROOT import TFile, TTree
7 import ROOT
8 from array import array
9 from optparse import OptionParser
10 from BetterConfigParser import BetterConfigParser
11 import sys
12 from mvainfos import mvainfo
13 from gethistofromtree import getScale
14
15
16 #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
17
18
19 #usage: ./train run gui
20
21
22 #CONFIGURE
23 argv = sys.argv
24 parser = OptionParser()
25 parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
26 help="Verbose mode.")
27 parser.add_option("-T", "--training", dest="training", default="",
28 help="Training")
29 parser.add_option("-C", "--config", dest="config", default=[], action="append",
30 help="configuration file")
31 (opts, args) = parser.parse_args(argv)
32 if opts.config =="":
33 opts.config = "config"
34
35 #load config
36 config = BetterConfigParser()
37 config.read(opts.config)
38 anaTag = config.get("Analysis","tag")
39 run=opts.training
40 gui=opts.verbose
41
42 #GLOABAL rescale from Train/Test Spliiting:
43 global_rescale=2.
44
45 #get locations:
46 MVAdir=config.get('Directories','vhbbpath')+'/data/'
47
48 #systematics
49 systematics=config.get('systematics','systematics')
50 systematics=systematics.split(' ')
51
52 weightF=config.get('Weights','weightF')
53
54
55
56
57 def getTree(job,cut,path,subsample=-1):
58 #print path+'/'+job.getpath()
59 newinput = TFile.Open(path+'/'+job.getpath(),'read')
60 output.cd()
61 Tree = newinput.Get(job.tree)
62 #Tree.SetDirectory(0)
63
64
65 if subsample>-1:
66 #print 'cut: (%s) & (%s)'%(cut,job.subcuts[subsample])
67 CuttedTree=Tree.CopyTree('(%s) & (%s)'%(cut,job.subcuts[subsample]))
68 #print '\t--> read in %s'%job.group[subsample]
69
70 else:
71 CuttedTree=Tree.CopyTree(cut)
72 #print '\t--> read in %s'%job.name
73 newinput.Close()
74
75 #CuttedTree.SetDirectory(0)
76 return CuttedTree
77
78 #def getScale(job,subsample=-1):
79 # input = TFile.Open(job.getpath())
80 # CountWithPU = input.Get("CountWithPU")
81 # CountWithPU2011B = input.Get("CountWithPU2011B")
82 # #print lumi*xsecs[i]/hist.GetBinContent(1)
83 # return float(job.lumi)*float(job.xsec)*float(job.sf)/(0.46502*CountWithPU.GetBinContent(1)+0.53498*CountWithPU2011B.GetBinContent(1))*2/float(job.split)
84
85
86
87 #CONFIG
88 #factory
89 factoryname=config.get('factory','factoryname')
90 factorysettings=config.get('factory','factorysettings')
91 #MVA
92 MVAtype=config.get(run,'MVAtype')
93 MVAname=run
94 MVAsettings=config.get(run,'MVAsettings')
95 fnameOutput = MVAdir+factoryname+'_'+MVAname+'.root'
96 #locations
97 path=config.get(run,'path')
98
99 TCutname=config.get(run, 'treeCut')
100 TCut=config.get('Cuts',TCutname)
101 #print TCut
102
103 #signals
104 signals=config.get(run,'signals')
105 signals=signals.split(' ')
106 #backgrounds
107 backgrounds=config.get(run,'backgrounds')
108 backgrounds=backgrounds.split(' ')
109
110 treeVarSet=config.get(run,'treeVarSet')
111
112 #variables
113 #TreeVar Array
114 MVA_Vars={}
115 MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
116 MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')
117 #Spectators:
118 #spectators=config.get(treeVarSet,'spectators')
119 #spectators=spectators.split(' ')
120
121 #TRAINING samples
122 infofile = open(path+'/samples.info','r')
123 info = pickle.load(infofile)
124 infofile.close()
125
126 #Workdir
127 workdir=ROOT.gDirectory.GetPath()
128
129
130 TrainCut='%s & EventForTraining==1'%TCut
131 EvalCut='%s & EventForTraining==0'%TCut
132
133 #load TRAIN trees
134 Tbackgrounds = []
135 TbScales = []
136 Tsignals = []
137 TsScales = []
138
139
140
141 output = ROOT.TFile.Open(fnameOutput, "RECREATE")
142
143 print '\n\t>>> TRAINING EVENTS <<<\n'
144
145 for job in info:
146 if eval(job.active):
147
148 if job.subsamples:
149 print '\tREADING IN SUBSAMPLES of %s'%job.name
150 for subsample in range(0,len(job.group)):
151 if job.subnames[subsample] in signals:
152 print '\t- %s as SIG'%job.group[subsample]
153 Tsignal = getTree(job,TrainCut,path,subsample)
154 ROOT.gDirectory.Cd(workdir)
155 TsScale = getScale(job,path,config,global_rescale,subsample)
156 Tsignals.append(Tsignal)
157 TsScales.append(TsScale)
158 print '\t\t\t%s events'%Tsignal.GetEntries()
159 elif job.subnames[subsample] in backgrounds:
160 print '\t- %s as BKG'%job.group[subsample]
161 Tbackground = getTree(job,TrainCut,path,subsample)
162 ROOT.gDirectory.Cd(workdir)
163 TbScale = getScale(job,path,config,global_rescale,subsample)
164 Tbackgrounds.append(Tbackground)
165 TbScales.append(TbScale)
166 print '\t\t\t%s events'%Tbackground.GetEntries()
167
168 else:
169 if job.name in signals:
170 print '\tREADING IN %s AS SIG'%job.name
171 Tsignal = getTree(job,TrainCut,path)
172 ROOT.gDirectory.Cd(workdir)
173 TsScale = getScale(job,path,config,global_rescale)
174 Tsignals.append(Tsignal)
175 TsScales.append(TsScale)
176 print '\t\t\t%s events'%Tsignal.GetEntries()
177 elif job.name in backgrounds:
178 print '\tREADING IN %s AS BKG'%job.name
179 Tbackground = getTree(job,TrainCut,path)
180 ROOT.gDirectory.Cd(workdir)
181 TbScale = getScale(job,path,config,global_rescale)
182 Tbackgrounds.append(Tbackground)
183 TbScales.append(TbScale)
184 print '\t\t\t%s events'%Tbackground.GetEntries()
185
186
187 #load EVALUATE trees
188 Ebackgrounds = []
189 EbScales = []
190 Esignals = []
191 EsScales = []
192
193 print '\n\t>>> TESTING EVENTS <<<\n'
194
195
196 for job in info:
197 if eval(job.active):
198
199 if job.subsamples:
200 print '\tREADING IN SUBSAMPLES of %s'%job.name
201 for subsample in range(0,len(job.group)):
202 if job.subnames[subsample] in signals:
203 print '\t- %s as SIG'%job.group[subsample]
204 Esignal = getTree(job,EvalCut,path,subsample)
205 ROOT.gDirectory.Cd(workdir)
206 EsScale = getScale(job,path,config,global_rescale,subsample)
207 Esignals.append(Esignal)
208 EsScales.append(EsScale)
209 print '\t\t\t%s events'%Esignal.GetEntries()
210 elif job.subnames[subsample] in backgrounds:
211 print '\t- %s as BKG'%job.group[subsample]
212 Ebackground = getTree(job,EvalCut,path,subsample)
213 ROOT.gDirectory.Cd(workdir)
214 EbScale = getScale(job,path,config,global_rescale,subsample)
215 Ebackgrounds.append(Ebackground)
216 EbScales.append(EbScale)
217 print '\t\t\t%s events'%Ebackground.GetEntries()
218
219 else:
220 if job.name in signals:
221 print '\tREADING IN %s AS SIG'%job.name
222 Esignal = getTree(job,EvalCut,path)
223 ROOT.gDirectory.Cd(workdir)
224 EsScale = getScale(job,path,config,global_rescale)
225 Esignals.append(Esignal)
226 EsScales.append(EsScale)
227 print '\t\t\t%s events'%Esignal.GetEntries()
228 elif job.name in backgrounds:
229 print '\tREADING IN %s AS BKG'%job.name
230 Ebackground = getTree(job,EvalCut,path)
231 ROOT.gDirectory.Cd(workdir)
232 EbScale = getScale(job,path,config,global_rescale)
233 Ebackgrounds.append(Ebackground)
234 EbScales.append(EbScale)
235 print '\t\t\t%s events'%Ebackground.GetEntries()
236
237
238 #output = ROOT.TFile.Open(fnameOutput, "RECREATE")
239 factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
240
241 #set input trees
242 for i in range(len(Tsignals)):
243
244 #print 'Number of SIG entries: %s'%Tsignals[i].GetEntries()
245 factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
246 #print 'Number of SIG entries: %s'%Esignals[i].GetEntries()
247 factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
248
249 for i in range(len(Tbackgrounds)):
250 if (Tbackgrounds[i].GetEntries()>0):
251 #print 'Number of BKG entries: %s'%Tbackgrounds[i].GetEntries()
252 factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
253
254 if (Ebackgrounds[i].GetEntries()>0):
255 #print 'Number of BKG entries: %s'%Ebackgrounds[i].GetEntries()
256 factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
257
258
259 for var in MVA_Vars['Nominal']:
260 factory.AddVariable(var,'D') # add the variables
261 #for var in spectators:
262 # factory.AddSpectator(var,'D') #add specators
263
264 #Execute TMVA
265 factory.SetSignalWeightExpression(weightF)
266 factory.SetBackgroundWeightExpression(weightF)
267 factory.Verbose()
268 factory.BookMethod(MVAtype,MVAname,MVAsettings)
269 factory.TrainAllMethods()
270 factory.TestAllMethods()
271 factory.EvaluateAllMethods()
272 output.Write()
273
274 #WRITE INFOFILE
275 infofile = open(MVAdir+factoryname+'_'+MVAname+'.info','w')
276 info=mvainfo(MVAname)
277 info.factoryname=factoryname
278 info.factorysettings=factorysettings
279 info.MVAtype=MVAtype
280 info.MVAsettings=MVAsettings
281 info.weightfilepath=MVAdir
282 info.path=path
283 info.varset=treeVarSet
284 info.vars=MVA_Vars['Nominal']
285 #info.spectators=spectators
286 pickle.dump(info,infofile)
287 infofile.close()
288
289 # open the TMVA Gui
290 if gui == True:
291 ROOT.gROOT.ProcessLine( ".L TMVAGui.C")
292 ROOT.gROOT.ProcessLine( "TMVAGui(\"%s\")" % fnameOutput )
293 ROOT.gApplication.Run()
294
295