ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
Revision: 1.14
Committed: Thu Jan 10 20:03:54 2013 UTC (12 years, 4 months ago) by bortigno
Content type: text/x-python
Branch: MAIN
Changes since 1.13: +15 -14 lines
Log Message:
add loading of VHbb namespace. This allows to use namespace variables for the BDT training.

File Contents

# User Rev Content
1 peller 1.1 #!/usr/bin/env python
2 nmohr 1.9 from optparse import OptionParser
3 nmohr 1.6 from BetterConfigParser import BetterConfigParser
4 peller 1.1 import sys
5    
6     #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
7     #usage: ./train run gui
8    
9    
10     #CONFIGURE
11 nmohr 1.9 argv = sys.argv
12     parser = OptionParser()
13     parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
14     help="Verbose mode.")
15     parser.add_option("-T", "--training", dest="training", default="",
16     help="Training")
17     parser.add_option("-C", "--config", dest="config", default=[], action="append",
18     help="configuration file")
19     (opts, args) = parser.parse_args(argv)
20     if opts.config =="":
21     opts.config = "config"
22 peller 1.1
23 bortigno 1.14
24     from samplesclass import sample
25     from printcolor import printc
26     import pickle
27     import ROOT
28     from ROOT import TFile, TTree
29     import ROOT
30     from array import array
31     from mvainfos import mvainfo
32     from gethistofromtree import getScale
33    
34    
35    
36 peller 1.1 #load config
37 nmohr 1.6 config = BetterConfigParser()
38 nmohr 1.9 config.read(opts.config)
39     anaTag = config.get("Analysis","tag")
40     run=opts.training
41     gui=opts.verbose
42 peller 1.7
43     #GLOABAL rescale from Train/Test Spliiting:
44     global_rescale=2.
45 peller 1.1
46     #get locations:
47 nmohr 1.10 MVAdir=config.get('Directories','vhbbpath')+'/data/'
48 nmohr 1.13 samplesinfo=config.get('Directories','samplesinfo')
49 peller 1.1
50     #systematics
51     systematics=config.get('systematics','systematics')
52     systematics=systematics.split(' ')
53    
54     weightF=config.get('Weights','weightF')
55    
56 bortigno 1.14 VHbbNameSpace=config.get('VHbbNameSpace','library')
57     ROOT.gSystem.Load(VHbbNameSpace)
58 peller 1.7
59 nmohr 1.9 def getTree(job,cut,path,subsample=-1):
60 peller 1.12 #print path+'/'+job.getpath()
61 nmohr 1.9 newinput = TFile.Open(path+'/'+job.getpath(),'read')
62 peller 1.7 output.cd()
63     Tree = newinput.Get(job.tree)
64 peller 1.1 #Tree.SetDirectory(0)
65 peller 1.7
66    
67     if subsample>-1:
68 peller 1.8 #print 'cut: (%s) & (%s)'%(cut,job.subcuts[subsample])
69 peller 1.7 CuttedTree=Tree.CopyTree('(%s) & (%s)'%(cut,job.subcuts[subsample]))
70     #print '\t--> read in %s'%job.group[subsample]
71    
72     else:
73     CuttedTree=Tree.CopyTree(cut)
74     #print '\t--> read in %s'%job.name
75 peller 1.8 newinput.Close()
76 peller 1.7
77     #CuttedTree.SetDirectory(0)
78 peller 1.1 return CuttedTree
79    
80 peller 1.7 #def getScale(job,subsample=-1):
81     # input = TFile.Open(job.getpath())
82     # CountWithPU = input.Get("CountWithPU")
83     # CountWithPU2011B = input.Get("CountWithPU2011B")
84     # #print lumi*xsecs[i]/hist.GetBinContent(1)
85     # return float(job.lumi)*float(job.xsec)*float(job.sf)/(0.46502*CountWithPU.GetBinContent(1)+0.53498*CountWithPU2011B.GetBinContent(1))*2/float(job.split)
86 peller 1.1
87    
88    
89     #CONFIG
90     #factory
91     factoryname=config.get('factory','factoryname')
92     factorysettings=config.get('factory','factorysettings')
93     #MVA
94     MVAtype=config.get(run,'MVAtype')
95     MVAname=run
96     MVAsettings=config.get(run,'MVAsettings')
97 nmohr 1.10 fnameOutput = MVAdir+factoryname+'_'+MVAname+'.root'
98 peller 1.1 #locations
99     path=config.get(run,'path')
100    
101     TCutname=config.get(run, 'treeCut')
102     TCut=config.get('Cuts',TCutname)
103 peller 1.3 #print TCut
104 peller 1.1
105     #signals
106     signals=config.get(run,'signals')
107     signals=signals.split(' ')
108     #backgrounds
109     backgrounds=config.get(run,'backgrounds')
110     backgrounds=backgrounds.split(' ')
111    
112     treeVarSet=config.get(run,'treeVarSet')
113    
114     #variables
115     #TreeVar Array
116     MVA_Vars={}
117     MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
118     MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')
119     #Spectators:
120 peller 1.3 #spectators=config.get(treeVarSet,'spectators')
121     #spectators=spectators.split(' ')
122 peller 1.1
123     #TRAINING samples
124 nmohr 1.13 infofile = open(samplesinfo,'r')
125 peller 1.1 info = pickle.load(infofile)
126     infofile.close()
127    
128     #Workdir
129     workdir=ROOT.gDirectory.GetPath()
130    
131    
132 nmohr 1.9 TrainCut='%s & EventForTraining==1'%TCut
133     EvalCut='%s & EventForTraining==0'%TCut
134 peller 1.1
135     #load TRAIN trees
136     Tbackgrounds = []
137     TbScales = []
138     Tsignals = []
139     TsScales = []
140    
141 peller 1.7
142    
143     output = ROOT.TFile.Open(fnameOutput, "RECREATE")
144    
145 peller 1.8 print '\n\t>>> TRAINING EVENTS <<<\n'
146 peller 1.7
147 peller 1.1 for job in info:
148 peller 1.7 if eval(job.active):
149 peller 1.8
150     if job.subsamples:
151     print '\tREADING IN SUBSAMPLES of %s'%job.name
152     for subsample in range(0,len(job.group)):
153     if job.subnames[subsample] in signals:
154     print '\t- %s as SIG'%job.group[subsample]
155 nmohr 1.9 Tsignal = getTree(job,TrainCut,path,subsample)
156 peller 1.8 ROOT.gDirectory.Cd(workdir)
157 peller 1.11 TsScale = getScale(job,path,config,global_rescale,subsample)
158 peller 1.8 Tsignals.append(Tsignal)
159     TsScales.append(TsScale)
160     print '\t\t\t%s events'%Tsignal.GetEntries()
161     elif job.subnames[subsample] in backgrounds:
162     print '\t- %s as BKG'%job.group[subsample]
163 nmohr 1.9 Tbackground = getTree(job,TrainCut,path,subsample)
164 peller 1.7 ROOT.gDirectory.Cd(workdir)
165 peller 1.11 TbScale = getScale(job,path,config,global_rescale,subsample)
166 peller 1.7 Tbackgrounds.append(Tbackground)
167     TbScales.append(TbScale)
168 peller 1.8 print '\t\t\t%s events'%Tbackground.GetEntries()
169    
170     else:
171     if job.name in signals:
172     print '\tREADING IN %s AS SIG'%job.name
173 nmohr 1.9 Tsignal = getTree(job,TrainCut,path)
174 peller 1.8 ROOT.gDirectory.Cd(workdir)
175 peller 1.11 TsScale = getScale(job,path,config,global_rescale)
176 peller 1.8 Tsignals.append(Tsignal)
177     TsScales.append(TsScale)
178     print '\t\t\t%s events'%Tsignal.GetEntries()
179     elif job.name in backgrounds:
180 peller 1.7 print '\tREADING IN %s AS BKG'%job.name
181 nmohr 1.9 Tbackground = getTree(job,TrainCut,path)
182 peller 1.7 ROOT.gDirectory.Cd(workdir)
183 peller 1.11 TbScale = getScale(job,path,config,global_rescale)
184 peller 1.7 Tbackgrounds.append(Tbackground)
185     TbScales.append(TbScale)
186 peller 1.8 print '\t\t\t%s events'%Tbackground.GetEntries()
187    
188    
189 peller 1.1 #load EVALUATE trees
190     Ebackgrounds = []
191     EbScales = []
192     Esignals = []
193     EsScales = []
194    
195 peller 1.8 print '\n\t>>> TESTING EVENTS <<<\n'
196 peller 1.7
197    
198 peller 1.1 for job in info:
199 peller 1.7 if eval(job.active):
200 peller 1.8
201     if job.subsamples:
202     print '\tREADING IN SUBSAMPLES of %s'%job.name
203     for subsample in range(0,len(job.group)):
204     if job.subnames[subsample] in signals:
205     print '\t- %s as SIG'%job.group[subsample]
206 nmohr 1.9 Esignal = getTree(job,EvalCut,path,subsample)
207 peller 1.8 ROOT.gDirectory.Cd(workdir)
208 peller 1.11 EsScale = getScale(job,path,config,global_rescale,subsample)
209 peller 1.8 Esignals.append(Esignal)
210     EsScales.append(EsScale)
211     print '\t\t\t%s events'%Esignal.GetEntries()
212     elif job.subnames[subsample] in backgrounds:
213     print '\t- %s as BKG'%job.group[subsample]
214 nmohr 1.9 Ebackground = getTree(job,EvalCut,path,subsample)
215 peller 1.7 ROOT.gDirectory.Cd(workdir)
216 peller 1.11 EbScale = getScale(job,path,config,global_rescale,subsample)
217 peller 1.7 Ebackgrounds.append(Ebackground)
218     EbScales.append(EbScale)
219 peller 1.8 print '\t\t\t%s events'%Ebackground.GetEntries()
220    
221     else:
222     if job.name in signals:
223     print '\tREADING IN %s AS SIG'%job.name
224 nmohr 1.9 Esignal = getTree(job,EvalCut,path)
225 peller 1.8 ROOT.gDirectory.Cd(workdir)
226 peller 1.11 EsScale = getScale(job,path,config,global_rescale)
227 peller 1.8 Esignals.append(Esignal)
228     EsScales.append(EsScale)
229     print '\t\t\t%s events'%Esignal.GetEntries()
230     elif job.name in backgrounds:
231 peller 1.7 print '\tREADING IN %s AS BKG'%job.name
232 nmohr 1.9 Ebackground = getTree(job,EvalCut,path)
233 peller 1.7 ROOT.gDirectory.Cd(workdir)
234 peller 1.11 EbScale = getScale(job,path,config,global_rescale)
235 peller 1.7 Ebackgrounds.append(Ebackground)
236     EbScales.append(EbScale)
237 peller 1.8 print '\t\t\t%s events'%Ebackground.GetEntries()
238 peller 1.1
239 peller 1.7
240     #output = ROOT.TFile.Open(fnameOutput, "RECREATE")
241 peller 1.1 factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
242    
243     #set input trees
244     for i in range(len(Tsignals)):
245    
246 peller 1.8 #print 'Number of SIG entries: %s'%Tsignals[i].GetEntries()
247 peller 1.1 factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
248 peller 1.8 #print 'Number of SIG entries: %s'%Esignals[i].GetEntries()
249 peller 1.1 factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
250    
251     for i in range(len(Tbackgrounds)):
252     if (Tbackgrounds[i].GetEntries()>0):
253 peller 1.8 #print 'Number of BKG entries: %s'%Tbackgrounds[i].GetEntries()
254 peller 1.1 factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
255    
256     if (Ebackgrounds[i].GetEntries()>0):
257 peller 1.8 #print 'Number of BKG entries: %s'%Ebackgrounds[i].GetEntries()
258 peller 1.1 factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
259    
260    
261     for var in MVA_Vars['Nominal']:
262     factory.AddVariable(var,'D') # add the variables
263     #for var in spectators:
264     # factory.AddSpectator(var,'D') #add specators
265    
266     #Execute TMVA
267     factory.SetSignalWeightExpression(weightF)
268 peller 1.12 factory.SetBackgroundWeightExpression(weightF)
269 peller 1.1 factory.Verbose()
270     factory.BookMethod(MVAtype,MVAname,MVAsettings)
271     factory.TrainAllMethods()
272     factory.TestAllMethods()
273     factory.EvaluateAllMethods()
274     output.Write()
275    
276     #WRITE INFOFILE
277 nmohr 1.10 infofile = open(MVAdir+factoryname+'_'+MVAname+'.info','w')
278 peller 1.1 info=mvainfo(MVAname)
279     info.factoryname=factoryname
280     info.factorysettings=factorysettings
281     info.MVAtype=MVAtype
282     info.MVAsettings=MVAsettings
283 nmohr 1.10 info.weightfilepath=MVAdir
284 peller 1.1 info.path=path
285     info.varset=treeVarSet
286     info.vars=MVA_Vars['Nominal']
287 peller 1.3 #info.spectators=spectators
288 peller 1.1 pickle.dump(info,infofile)
289     infofile.close()
290    
291     # open the TMVA Gui
292 nmohr 1.9 if gui == True:
293 peller 1.1 ROOT.gROOT.ProcessLine( ".L TMVAGui.C")
294     ROOT.gROOT.ProcessLine( "TMVAGui(\"%s\")" % fnameOutput )
295     ROOT.gApplication.Run()
296    
297