ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
Revision: 1.8
Committed: Fri Aug 3 15:43:03 2012 UTC (12 years, 9 months ago) by peller
Content type: text/x-python
Branch: MAIN
Changes since 1.7: +66 -39 lines
Log Message:
added samples configs w/o splitting and improved training script

File Contents

# Content
1 #!/usr/bin/env python
2 from samplesclass import sample
3 from printcolor import printc
4 import pickle
5 import ROOT
6 from ROOT import TFile, TTree
7 import ROOT
8 from array import array
9 from BetterConfigParser import BetterConfigParser
10 import sys
11 from mvainfos import mvainfo
12 from gethistofromtree import getScale
13
14
15 #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
16
17
18 #usage: ./train run gui
19
20
21 #CONFIGURE
22
23 #load config
24 config = BetterConfigParser()
25 #config.read('./config')
26 #config.read('./config7TeV')
27 config.read('./config7TeV_ZZ')
28
29
30 #GLOABAL rescale from Train/Test Spliiting:
31 global_rescale=2.
32
33 #get locations:
34 Wdir=config.get('Directories','Wdir')
35
36 #systematics
37 systematics=config.get('systematics','systematics')
38 systematics=systematics.split(' ')
39
40 weightF=config.get('Weights','weightF')
41
42
43
44
45 def getTree(job,cut,subsample=-1):
46
47 newinput = TFile.Open(job.getpath(),'read')
48 output.cd()
49 Tree = newinput.Get(job.tree)
50 #Tree.SetDirectory(0)
51
52
53 if subsample>-1:
54 #print 'cut: (%s) & (%s)'%(cut,job.subcuts[subsample])
55 CuttedTree=Tree.CopyTree('(%s) & (%s)'%(cut,job.subcuts[subsample]))
56 #print '\t--> read in %s'%job.group[subsample]
57
58 else:
59 CuttedTree=Tree.CopyTree(cut)
60 #print '\t--> read in %s'%job.name
61 newinput.Close()
62
63 #CuttedTree.SetDirectory(0)
64 return CuttedTree
65
66 #def getScale(job,subsample=-1):
67 # input = TFile.Open(job.getpath())
68 # CountWithPU = input.Get("CountWithPU")
69 # CountWithPU2011B = input.Get("CountWithPU2011B")
70 # #print lumi*xsecs[i]/hist.GetBinContent(1)
71 # return float(job.lumi)*float(job.xsec)*float(job.sf)/(0.46502*CountWithPU.GetBinContent(1)+0.53498*CountWithPU2011B.GetBinContent(1))*2/float(job.split)
72
73 run=sys.argv[1]
74 gui=sys.argv[2]
75
76
77 #CONFIG
78 #factory
79 factoryname=config.get('factory','factoryname')
80 factorysettings=config.get('factory','factorysettings')
81 #MVA
82 MVAtype=config.get(run,'MVAtype')
83 MVAname=run
84 MVAsettings=config.get(run,'MVAsettings')
85 fnameOutput = Wdir +'/weights/'+factoryname+'_'+MVAname+'.root'
86 #locations
87 path=config.get(run,'path')
88
89 TCutname=config.get(run, 'treeCut')
90 TCut=config.get('Cuts',TCutname)
91 #print TCut
92
93 #signals
94 signals=config.get(run,'signals')
95 signals=signals.split(' ')
96 #backgrounds
97 backgrounds=config.get(run,'backgrounds')
98 backgrounds=backgrounds.split(' ')
99
100 treeVarSet=config.get(run,'treeVarSet')
101
102 #variables
103 #TreeVar Array
104 MVA_Vars={}
105 MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
106 MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')
107 #Spectators:
108 #spectators=config.get(treeVarSet,'spectators')
109 #spectators=spectators.split(' ')
110
111 #TRAINING samples
112 infofile = open(path+'/samples.info','r')
113 info = pickle.load(infofile)
114 infofile.close()
115
116 #Workdir
117 workdir=ROOT.gDirectory.GetPath()
118
119
120 TrainCut='%s && EventForTraining==1'%TCut
121 EvalCut='%s && EventForTraining==0'%TCut
122
123 #load TRAIN trees
124 Tbackgrounds = []
125 TbScales = []
126 Tsignals = []
127 TsScales = []
128
129
130
131 output = ROOT.TFile.Open(fnameOutput, "RECREATE")
132
133 print '\n\t>>> TRAINING EVENTS <<<\n'
134
135 for job in info:
136 if eval(job.active):
137
138 if job.subsamples:
139 print '\tREADING IN SUBSAMPLES of %s'%job.name
140 for subsample in range(0,len(job.group)):
141 if job.subnames[subsample] in signals:
142 print '\t- %s as SIG'%job.group[subsample]
143 Tsignal = getTree(job,TrainCut,subsample)
144 ROOT.gDirectory.Cd(workdir)
145 TsScale = getScale(job,global_rescale,subsample)
146 Tsignals.append(Tsignal)
147 TsScales.append(TsScale)
148 print '\t\t\t%s events'%Tsignal.GetEntries()
149 elif job.subnames[subsample] in backgrounds:
150 print '\t- %s as BKG'%job.group[subsample]
151 Tbackground = getTree(job,TrainCut,subsample)
152 ROOT.gDirectory.Cd(workdir)
153 TbScale = getScale(job,global_rescale,subsample)
154 Tbackgrounds.append(Tbackground)
155 TbScales.append(TbScale)
156 print '\t\t\t%s events'%Tbackground.GetEntries()
157
158 else:
159 if job.name in signals:
160 print '\tREADING IN %s AS SIG'%job.name
161 Tsignal = getTree(job,TrainCut)
162 ROOT.gDirectory.Cd(workdir)
163 TsScale = getScale(job,global_rescale)
164 Tsignals.append(Tsignal)
165 TsScales.append(TsScale)
166 print '\t\t\t%s events'%Tsignal.GetEntries()
167 elif job.name in backgrounds:
168 print '\tREADING IN %s AS BKG'%job.name
169 Tbackground = getTree(job,TrainCut)
170 ROOT.gDirectory.Cd(workdir)
171 TbScale = getScale(job,global_rescale)
172 Tbackgrounds.append(Tbackground)
173 TbScales.append(TbScale)
174 print '\t\t\t%s events'%Tbackground.GetEntries()
175
176
177 #load EVALUATE trees
178 Ebackgrounds = []
179 EbScales = []
180 Esignals = []
181 EsScales = []
182
183 print '\n\t>>> TESTING EVENTS <<<\n'
184
185
186 for job in info:
187 if eval(job.active):
188
189 if job.subsamples:
190 print '\tREADING IN SUBSAMPLES of %s'%job.name
191 for subsample in range(0,len(job.group)):
192 if job.subnames[subsample] in signals:
193 print '\t- %s as SIG'%job.group[subsample]
194 Esignal = getTree(job,EvalCut,subsample)
195 ROOT.gDirectory.Cd(workdir)
196 EsScale = getScale(job,global_rescale,subsample)
197 Esignals.append(Esignal)
198 EsScales.append(EsScale)
199 print '\t\t\t%s events'%Esignal.GetEntries()
200 elif job.subnames[subsample] in backgrounds:
201 print '\t- %s as BKG'%job.group[subsample]
202 Ebackground = getTree(job,EvalCut,subsample)
203 ROOT.gDirectory.Cd(workdir)
204 EbScale = getScale(job,global_rescale,subsample)
205 Ebackgrounds.append(Ebackground)
206 EbScales.append(EbScale)
207 print '\t\t\t%s events'%Ebackground.GetEntries()
208
209 else:
210 if job.name in signals:
211 print '\tREADING IN %s AS SIG'%job.name
212 Esignal = getTree(job,EvalCut)
213 ROOT.gDirectory.Cd(workdir)
214 EsScale = getScale(job,global_rescale)
215 Esignals.append(Esignal)
216 EsScales.append(EsScale)
217 print '\t\t\t%s events'%Esignal.GetEntries()
218 elif job.name in backgrounds:
219 print '\tREADING IN %s AS BKG'%job.name
220 Ebackground = getTree(job,EvalCut)
221 ROOT.gDirectory.Cd(workdir)
222 EbScale = getScale(job,global_rescale)
223 Ebackgrounds.append(Ebackground)
224 EbScales.append(EbScale)
225 print '\t\t\t%s events'%Ebackground.GetEntries()
226
227
228 #output = ROOT.TFile.Open(fnameOutput, "RECREATE")
229 factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
230
231 #set input trees
232 for i in range(len(Tsignals)):
233
234 #print 'Number of SIG entries: %s'%Tsignals[i].GetEntries()
235 factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
236 #print 'Number of SIG entries: %s'%Esignals[i].GetEntries()
237 factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
238
239 for i in range(len(Tbackgrounds)):
240 if (Tbackgrounds[i].GetEntries()>0):
241 #print 'Number of BKG entries: %s'%Tbackgrounds[i].GetEntries()
242 factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
243
244 if (Ebackgrounds[i].GetEntries()>0):
245 #print 'Number of BKG entries: %s'%Ebackgrounds[i].GetEntries()
246 factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
247
248
249 for var in MVA_Vars['Nominal']:
250 factory.AddVariable(var,'D') # add the variables
251 #for var in spectators:
252 # factory.AddSpectator(var,'D') #add specators
253
254 #Execute TMVA
255 factory.SetSignalWeightExpression(weightF)
256 factory.Verbose()
257 factory.BookMethod(MVAtype,MVAname,MVAsettings)
258 factory.TrainAllMethods()
259 factory.TestAllMethods()
260 factory.EvaluateAllMethods()
261 output.Write()
262
263 #WRITE INFOFILE
264 infofile = open(Wdir+'/weights/'+factoryname+'_'+MVAname+'.info','w')
265 info=mvainfo(MVAname)
266 info.factoryname=factoryname
267 info.factorysettings=factorysettings
268 info.MVAtype=MVAtype
269 info.MVAsettings=MVAsettings
270 info.weightfilepath=Wdir+'/weights'
271 info.path=path
272 info.varset=treeVarSet
273 info.vars=MVA_Vars['Nominal']
274 #info.spectators=spectators
275 pickle.dump(info,infofile)
276 infofile.close()
277
278 # open the TMVA Gui
279 if gui == 'gui':
280 ROOT.gROOT.ProcessLine( ".L TMVAGui.C")
281 ROOT.gROOT.ProcessLine( "TMVAGui(\"%s\")" % fnameOutput )
282 ROOT.gApplication.Run()
283
284