ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
Revision: 1.13
Committed: Thu Oct 11 16:53:25 2012 UTC (12 years, 7 months ago) by nmohr
Content type: text/x-python
Branch: MAIN
CVS Tags: hcpApproval, HCP_unblinding, hcpPreApp
Changes since 1.12: +2 -1 lines
Log Message:
Only one place for samples info

File Contents

# Content
1 #!/usr/bin/env python
2 from samplesclass import sample
3 from printcolor import printc
4 import pickle
5 import ROOT
6 from ROOT import TFile, TTree
7 import ROOT
8 from array import array
9 from optparse import OptionParser
10 from BetterConfigParser import BetterConfigParser
11 import sys
12 from mvainfos import mvainfo
13 from gethistofromtree import getScale
14
15
16 #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
17
18
19 #usage: ./train run gui
20
21
22 #CONFIGURE
23 argv = sys.argv
24 parser = OptionParser()
25 parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
26 help="Verbose mode.")
27 parser.add_option("-T", "--training", dest="training", default="",
28 help="Training")
29 parser.add_option("-C", "--config", dest="config", default=[], action="append",
30 help="configuration file")
31 (opts, args) = parser.parse_args(argv)
32 if opts.config =="":
33 opts.config = "config"
34
35 #load config
36 config = BetterConfigParser()
37 config.read(opts.config)
38 anaTag = config.get("Analysis","tag")
39 run=opts.training
40 gui=opts.verbose
41
42 #GLOABAL rescale from Train/Test Spliiting:
43 global_rescale=2.
44
45 #get locations:
46 MVAdir=config.get('Directories','vhbbpath')+'/data/'
47 samplesinfo=config.get('Directories','samplesinfo')
48
49 #systematics
50 systematics=config.get('systematics','systematics')
51 systematics=systematics.split(' ')
52
53 weightF=config.get('Weights','weightF')
54
55
56
57
58 def getTree(job,cut,path,subsample=-1):
59 #print path+'/'+job.getpath()
60 newinput = TFile.Open(path+'/'+job.getpath(),'read')
61 output.cd()
62 Tree = newinput.Get(job.tree)
63 #Tree.SetDirectory(0)
64
65
66 if subsample>-1:
67 #print 'cut: (%s) & (%s)'%(cut,job.subcuts[subsample])
68 CuttedTree=Tree.CopyTree('(%s) & (%s)'%(cut,job.subcuts[subsample]))
69 #print '\t--> read in %s'%job.group[subsample]
70
71 else:
72 CuttedTree=Tree.CopyTree(cut)
73 #print '\t--> read in %s'%job.name
74 newinput.Close()
75
76 #CuttedTree.SetDirectory(0)
77 return CuttedTree
78
79 #def getScale(job,subsample=-1):
80 # input = TFile.Open(job.getpath())
81 # CountWithPU = input.Get("CountWithPU")
82 # CountWithPU2011B = input.Get("CountWithPU2011B")
83 # #print lumi*xsecs[i]/hist.GetBinContent(1)
84 # return float(job.lumi)*float(job.xsec)*float(job.sf)/(0.46502*CountWithPU.GetBinContent(1)+0.53498*CountWithPU2011B.GetBinContent(1))*2/float(job.split)
85
86
87
88 #CONFIG
89 #factory
90 factoryname=config.get('factory','factoryname')
91 factorysettings=config.get('factory','factorysettings')
92 #MVA
93 MVAtype=config.get(run,'MVAtype')
94 MVAname=run
95 MVAsettings=config.get(run,'MVAsettings')
96 fnameOutput = MVAdir+factoryname+'_'+MVAname+'.root'
97 #locations
98 path=config.get(run,'path')
99
100 TCutname=config.get(run, 'treeCut')
101 TCut=config.get('Cuts',TCutname)
102 #print TCut
103
104 #signals
105 signals=config.get(run,'signals')
106 signals=signals.split(' ')
107 #backgrounds
108 backgrounds=config.get(run,'backgrounds')
109 backgrounds=backgrounds.split(' ')
110
111 treeVarSet=config.get(run,'treeVarSet')
112
113 #variables
114 #TreeVar Array
115 MVA_Vars={}
116 MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
117 MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')
118 #Spectators:
119 #spectators=config.get(treeVarSet,'spectators')
120 #spectators=spectators.split(' ')
121
122 #TRAINING samples
123 infofile = open(samplesinfo,'r')
124 info = pickle.load(infofile)
125 infofile.close()
126
127 #Workdir
128 workdir=ROOT.gDirectory.GetPath()
129
130
131 TrainCut='%s & EventForTraining==1'%TCut
132 EvalCut='%s & EventForTraining==0'%TCut
133
134 #load TRAIN trees
135 Tbackgrounds = []
136 TbScales = []
137 Tsignals = []
138 TsScales = []
139
140
141
142 output = ROOT.TFile.Open(fnameOutput, "RECREATE")
143
144 print '\n\t>>> TRAINING EVENTS <<<\n'
145
146 for job in info:
147 if eval(job.active):
148
149 if job.subsamples:
150 print '\tREADING IN SUBSAMPLES of %s'%job.name
151 for subsample in range(0,len(job.group)):
152 if job.subnames[subsample] in signals:
153 print '\t- %s as SIG'%job.group[subsample]
154 Tsignal = getTree(job,TrainCut,path,subsample)
155 ROOT.gDirectory.Cd(workdir)
156 TsScale = getScale(job,path,config,global_rescale,subsample)
157 Tsignals.append(Tsignal)
158 TsScales.append(TsScale)
159 print '\t\t\t%s events'%Tsignal.GetEntries()
160 elif job.subnames[subsample] in backgrounds:
161 print '\t- %s as BKG'%job.group[subsample]
162 Tbackground = getTree(job,TrainCut,path,subsample)
163 ROOT.gDirectory.Cd(workdir)
164 TbScale = getScale(job,path,config,global_rescale,subsample)
165 Tbackgrounds.append(Tbackground)
166 TbScales.append(TbScale)
167 print '\t\t\t%s events'%Tbackground.GetEntries()
168
169 else:
170 if job.name in signals:
171 print '\tREADING IN %s AS SIG'%job.name
172 Tsignal = getTree(job,TrainCut,path)
173 ROOT.gDirectory.Cd(workdir)
174 TsScale = getScale(job,path,config,global_rescale)
175 Tsignals.append(Tsignal)
176 TsScales.append(TsScale)
177 print '\t\t\t%s events'%Tsignal.GetEntries()
178 elif job.name in backgrounds:
179 print '\tREADING IN %s AS BKG'%job.name
180 Tbackground = getTree(job,TrainCut,path)
181 ROOT.gDirectory.Cd(workdir)
182 TbScale = getScale(job,path,config,global_rescale)
183 Tbackgrounds.append(Tbackground)
184 TbScales.append(TbScale)
185 print '\t\t\t%s events'%Tbackground.GetEntries()
186
187
188 #load EVALUATE trees
189 Ebackgrounds = []
190 EbScales = []
191 Esignals = []
192 EsScales = []
193
194 print '\n\t>>> TESTING EVENTS <<<\n'
195
196
197 for job in info:
198 if eval(job.active):
199
200 if job.subsamples:
201 print '\tREADING IN SUBSAMPLES of %s'%job.name
202 for subsample in range(0,len(job.group)):
203 if job.subnames[subsample] in signals:
204 print '\t- %s as SIG'%job.group[subsample]
205 Esignal = getTree(job,EvalCut,path,subsample)
206 ROOT.gDirectory.Cd(workdir)
207 EsScale = getScale(job,path,config,global_rescale,subsample)
208 Esignals.append(Esignal)
209 EsScales.append(EsScale)
210 print '\t\t\t%s events'%Esignal.GetEntries()
211 elif job.subnames[subsample] in backgrounds:
212 print '\t- %s as BKG'%job.group[subsample]
213 Ebackground = getTree(job,EvalCut,path,subsample)
214 ROOT.gDirectory.Cd(workdir)
215 EbScale = getScale(job,path,config,global_rescale,subsample)
216 Ebackgrounds.append(Ebackground)
217 EbScales.append(EbScale)
218 print '\t\t\t%s events'%Ebackground.GetEntries()
219
220 else:
221 if job.name in signals:
222 print '\tREADING IN %s AS SIG'%job.name
223 Esignal = getTree(job,EvalCut,path)
224 ROOT.gDirectory.Cd(workdir)
225 EsScale = getScale(job,path,config,global_rescale)
226 Esignals.append(Esignal)
227 EsScales.append(EsScale)
228 print '\t\t\t%s events'%Esignal.GetEntries()
229 elif job.name in backgrounds:
230 print '\tREADING IN %s AS BKG'%job.name
231 Ebackground = getTree(job,EvalCut,path)
232 ROOT.gDirectory.Cd(workdir)
233 EbScale = getScale(job,path,config,global_rescale)
234 Ebackgrounds.append(Ebackground)
235 EbScales.append(EbScale)
236 print '\t\t\t%s events'%Ebackground.GetEntries()
237
238
239 #output = ROOT.TFile.Open(fnameOutput, "RECREATE")
240 factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
241
242 #set input trees
243 for i in range(len(Tsignals)):
244
245 #print 'Number of SIG entries: %s'%Tsignals[i].GetEntries()
246 factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
247 #print 'Number of SIG entries: %s'%Esignals[i].GetEntries()
248 factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
249
250 for i in range(len(Tbackgrounds)):
251 if (Tbackgrounds[i].GetEntries()>0):
252 #print 'Number of BKG entries: %s'%Tbackgrounds[i].GetEntries()
253 factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
254
255 if (Ebackgrounds[i].GetEntries()>0):
256 #print 'Number of BKG entries: %s'%Ebackgrounds[i].GetEntries()
257 factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
258
259
260 for var in MVA_Vars['Nominal']:
261 factory.AddVariable(var,'D') # add the variables
262 #for var in spectators:
263 # factory.AddSpectator(var,'D') #add specators
264
265 #Execute TMVA
266 factory.SetSignalWeightExpression(weightF)
267 factory.SetBackgroundWeightExpression(weightF)
268 factory.Verbose()
269 factory.BookMethod(MVAtype,MVAname,MVAsettings)
270 factory.TrainAllMethods()
271 factory.TestAllMethods()
272 factory.EvaluateAllMethods()
273 output.Write()
274
275 #WRITE INFOFILE
276 infofile = open(MVAdir+factoryname+'_'+MVAname+'.info','w')
277 info=mvainfo(MVAname)
278 info.factoryname=factoryname
279 info.factorysettings=factorysettings
280 info.MVAtype=MVAtype
281 info.MVAsettings=MVAsettings
282 info.weightfilepath=MVAdir
283 info.path=path
284 info.varset=treeVarSet
285 info.vars=MVA_Vars['Nominal']
286 #info.spectators=spectators
287 pickle.dump(info,infofile)
288 infofile.close()
289
290 # open the TMVA Gui
291 if gui == True:
292 ROOT.gROOT.ProcessLine( ".L TMVAGui.C")
293 ROOT.gROOT.ProcessLine( "TMVAGui(\"%s\")" % fnameOutput )
294 ROOT.gApplication.Run()
295
296