ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
Revision: 1.16
Committed: Wed Jan 16 16:22:47 2013 UTC (12 years, 4 months ago) by peller
Content type: text/x-python
Branch: MAIN
Changes since 1.15: +9 -19 lines
Log Message:
reorganized the whole repository. Macros im myutils, config files in subdirectories. Config file split in parts. Path config file restructured. Moved all path options to the path config. Changed the code accordingly.

File Contents

# Content
1 #!/usr/bin/env python
2 from optparse import OptionParser
3 import sys
4 import pickle
5 import ROOT
6 from ROOT import TFile, TTree
7 from array import array
8 from myutils import BetterConfigParser, sample, printc, mvainfo, parse_info
9 #ToDo:
10 from gethistofromtree import getScale
11
12 #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
13 #usage: ./train run gui
14
15
16 #CONFIGURE
17 argv = sys.argv
18 parser = OptionParser()
19 parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
20 help="Verbose mode.")
21 parser.add_option("-T", "--training", dest="training", default="",
22 help="Training")
23 parser.add_option("-C", "--config", dest="config", default=[], action="append",
24 help="configuration file")
25 (opts, args) = parser.parse_args(argv)
26 if opts.config =="":
27 opts.config = "config"
28
29 #load config
30 config = BetterConfigParser()
31 config.read(opts.config)
32 anaTag = config.get("Analysis","tag")
33 run=opts.training
34 gui=opts.verbose
35
36 #GLOABAL rescale from Train/Test Spliiting:
37 global_rescale=2.
38
39 #get locations:
40 MVAdir=config.get('Directories','vhbbpath')+'/data/'
41 samplesinfo=config.get('Directories','samplesinfo')
42
43 #systematics
44 systematics=config.get('systematics','systematics')
45 systematics=systematics.split(' ')
46
47 weightF=config.get('Weights','weightF')
48
49 VHbbNameSpace=config.get('VHbbNameSpace','library')
50 ROOT.gSystem.Load(VHbbNameSpace)
51
52 def getTree(job,cut,path,subsample=-1):
53 #print path+'/'+job.getpath()
54 newinput = TFile.Open(path+'/'+job.getpath(),'read')
55 output.cd()
56 Tree = newinput.Get(job.tree)
57 #Tree.SetDirectory(0)
58
59
60 if subsample>-1:
61 #print 'cut: (%s) & (%s)'%(cut,job.subcuts[subsample])
62 CuttedTree=Tree.CopyTree('(%s) & (%s)'%(cut,job.subcuts[subsample]))
63 #print '\t--> read in %s'%job.group[subsample]
64
65 else:
66 CuttedTree=Tree.CopyTree(cut)
67 #print '\t--> read in %s'%job.name
68 newinput.Close()
69
70 #CuttedTree.SetDirectory(0)
71 return CuttedTree
72
73 #def getScale(job,subsample=-1):
74 # input = TFile.Open(job.getpath())
75 # CountWithPU = input.Get("CountWithPU")
76 # CountWithPU2011B = input.Get("CountWithPU2011B")
77 # #print lumi*xsecs[i]/hist.GetBinContent(1)
78 # return float(job.lumi)*float(job.xsec)*float(job.sf)/(0.46502*CountWithPU.GetBinContent(1)+0.53498*CountWithPU2011B.GetBinContent(1))*2/float(job.split)
79
80
81
82 #CONFIG
83 #factory
84 factoryname=config.get('factory','factoryname')
85 factorysettings=config.get('factory','factorysettings')
86 #MVA
87 MVAtype=config.get(run,'MVAtype')
88 MVAname=run
89 MVAsettings=config.get(run,'MVAsettings')
90 fnameOutput = MVAdir+factoryname+'_'+MVAname+'.root'
91 #locations
92 path=config.get(run,'path')
93
94 TCutname=config.get(run, 'treeCut')
95 TCut=config.get('Cuts',TCutname)
96 #print TCut
97
98 #signals
99 signals=config.get(run,'signals')
100 signals=signals.split(' ')
101 #backgrounds
102 backgrounds=config.get(run,'backgrounds')
103 backgrounds=backgrounds.split(' ')
104
105 treeVarSet=config.get(run,'treeVarSet')
106
107 #variables
108 #TreeVar Array
109 MVA_Vars={}
110 MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
111 MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')
112 #Spectators:
113 #spectators=config.get(treeVarSet,'spectators')
114 #spectators=spectators.split(' ')
115
116 #TRAINING samples
117 #infofile = open(samplesinfo,'r')
118 #info = pickle.load(infofile)
119 #infofile.close()
120
121 info = parse_info(samplesinfo,path)
122
123 #Workdir
124 workdir=ROOT.gDirectory.GetPath()
125
126
127 TrainCut='%s & EventForTraining==1'%TCut
128 EvalCut='%s & EventForTraining==0'%TCut
129
130 #load TRAIN trees
131 Tbackgrounds = []
132 TbScales = []
133 Tsignals = []
134 TsScales = []
135
136
137
138 output = ROOT.TFile.Open(fnameOutput, "RECREATE")
139
140 print '\n\t>>> TRAINING EVENTS <<<\n'
141
142 for job in info:
143 if eval(job.active):
144
145 if job.subsamples:
146 print '\tREADING IN SUBSAMPLES of %s'%job.name
147 for subsample in range(0,len(job.group)):
148 if job.subnames[subsample] in signals:
149 print '\t- %s as SIG'%job.group[subsample]
150 Tsignal = getTree(job,TrainCut,path,subsample)
151 ROOT.gDirectory.Cd(workdir)
152 TsScale = getScale(job,path,config,global_rescale,subsample)
153 Tsignals.append(Tsignal)
154 TsScales.append(TsScale)
155 print '\t\t\t%s events'%Tsignal.GetEntries()
156 elif job.subnames[subsample] in backgrounds:
157 print '\t- %s as BKG'%job.group[subsample]
158 Tbackground = getTree(job,TrainCut,path,subsample)
159 ROOT.gDirectory.Cd(workdir)
160 TbScale = getScale(job,path,config,global_rescale,subsample)
161 Tbackgrounds.append(Tbackground)
162 TbScales.append(TbScale)
163 print '\t\t\t%s events'%Tbackground.GetEntries()
164
165 else:
166 if job.name in signals:
167 print '\tREADING IN %s AS SIG'%job.name
168 Tsignal = getTree(job,TrainCut,path)
169 ROOT.gDirectory.Cd(workdir)
170 TsScale = getScale(job,path,config,global_rescale)
171 Tsignals.append(Tsignal)
172 TsScales.append(TsScale)
173 print '\t\t\t%s events'%Tsignal.GetEntries()
174 elif job.name in backgrounds:
175 print '\tREADING IN %s AS BKG'%job.name
176 Tbackground = getTree(job,TrainCut,path)
177 ROOT.gDirectory.Cd(workdir)
178 TbScale = getScale(job,path,config,global_rescale)
179 Tbackgrounds.append(Tbackground)
180 TbScales.append(TbScale)
181 print '\t\t\t%s events'%Tbackground.GetEntries()
182
183
184 #load EVALUATE trees
185 Ebackgrounds = []
186 EbScales = []
187 Esignals = []
188 EsScales = []
189
190 print '\n\t>>> TESTING EVENTS <<<\n'
191
192
193 for job in info:
194 if eval(job.active):
195
196 if job.subsamples:
197 print '\tREADING IN SUBSAMPLES of %s'%job.name
198 for subsample in range(0,len(job.group)):
199 if job.subnames[subsample] in signals:
200 print '\t- %s as SIG'%job.group[subsample]
201 Esignal = getTree(job,EvalCut,path,subsample)
202 ROOT.gDirectory.Cd(workdir)
203 EsScale = getScale(job,path,config,global_rescale,subsample)
204 Esignals.append(Esignal)
205 EsScales.append(EsScale)
206 print '\t\t\t%s events'%Esignal.GetEntries()
207 elif job.subnames[subsample] in backgrounds:
208 print '\t- %s as BKG'%job.group[subsample]
209 Ebackground = getTree(job,EvalCut,path,subsample)
210 ROOT.gDirectory.Cd(workdir)
211 EbScale = getScale(job,path,config,global_rescale,subsample)
212 Ebackgrounds.append(Ebackground)
213 EbScales.append(EbScale)
214 print '\t\t\t%s events'%Ebackground.GetEntries()
215
216 else:
217 if job.name in signals:
218 print '\tREADING IN %s AS SIG'%job.name
219 Esignal = getTree(job,EvalCut,path)
220 ROOT.gDirectory.Cd(workdir)
221 EsScale = getScale(job,path,config,global_rescale)
222 Esignals.append(Esignal)
223 EsScales.append(EsScale)
224 print '\t\t\t%s events'%Esignal.GetEntries()
225 elif job.name in backgrounds:
226 print '\tREADING IN %s AS BKG'%job.name
227 Ebackground = getTree(job,EvalCut,path)
228 ROOT.gDirectory.Cd(workdir)
229 EbScale = getScale(job,path,config,global_rescale)
230 Ebackgrounds.append(Ebackground)
231 EbScales.append(EbScale)
232 print '\t\t\t%s events'%Ebackground.GetEntries()
233
234
235 #output = ROOT.TFile.Open(fnameOutput, "RECREATE")
236 factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
237
238 #set input trees
239 for i in range(len(Tsignals)):
240
241 #print 'Number of SIG entries: %s'%Tsignals[i].GetEntries()
242 factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
243 #print 'Number of SIG entries: %s'%Esignals[i].GetEntries()
244 factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
245
246 for i in range(len(Tbackgrounds)):
247 if (Tbackgrounds[i].GetEntries()>0):
248 #print 'Number of BKG entries: %s'%Tbackgrounds[i].GetEntries()
249 factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
250
251 if (Ebackgrounds[i].GetEntries()>0):
252 #print 'Number of BKG entries: %s'%Ebackgrounds[i].GetEntries()
253 factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
254
255
256 for var in MVA_Vars['Nominal']:
257 factory.AddVariable(var,'D') # add the variables
258 #for var in spectators:
259 # factory.AddSpectator(var,'D') #add specators
260
261 #Execute TMVA
262 factory.SetSignalWeightExpression(weightF)
263 factory.SetBackgroundWeightExpression(weightF)
264 factory.Verbose()
265 factory.BookMethod(MVAtype,MVAname,MVAsettings)
266 factory.TrainAllMethods()
267 factory.TestAllMethods()
268 factory.EvaluateAllMethods()
269 output.Write()
270
271 #WRITE INFOFILE
272 infofile = open(MVAdir+factoryname+'_'+MVAname+'.info','w')
273 info=mvainfo(MVAname)
274 info.factoryname=factoryname
275 info.factorysettings=factorysettings
276 info.MVAtype=MVAtype
277 info.MVAsettings=MVAsettings
278 info.weightfilepath=MVAdir
279 info.path=path
280 info.varset=treeVarSet
281 info.vars=MVA_Vars['Nominal']
282 #info.spectators=spectators
283 pickle.dump(info,infofile)
284 infofile.close()
285
286 # open the TMVA Gui
287 if gui == True:
288 ROOT.gROOT.ProcessLine( ".L myutils/TMVAGui.C")
289 ROOT.gROOT.ProcessLine( "TMVAGui(\"%s\")" % fnameOutput )
290 ROOT.gApplication.Run()
291
292