ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
Revision: 1.11
Committed: Wed Oct 3 12:15:52 2012 UTC (12 years, 7 months ago) by peller
Content type: text/x-python
Branch: MAIN
Changes since 1.10: +8 -8 lines
Log Message:
8Tev training

File Contents

# User Rev Content
1 peller 1.1 #!/usr/bin/env python
2 peller 1.5 from samplesclass import sample
3 peller 1.1 from printcolor import printc
4     import pickle
5     import ROOT
6     from ROOT import TFile, TTree
7     import ROOT
8     from array import array
9 nmohr 1.9 from optparse import OptionParser
10 nmohr 1.6 from BetterConfigParser import BetterConfigParser
11 peller 1.1 import sys
12     from mvainfos import mvainfo
13 peller 1.7 from gethistofromtree import getScale
14    
15 peller 1.1
16     #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
17    
18    
19     #usage: ./train run gui
20    
21    
22     #CONFIGURE
23 nmohr 1.9 argv = sys.argv
24     parser = OptionParser()
25     parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
26     help="Verbose mode.")
27     parser.add_option("-T", "--training", dest="training", default="",
28     help="Training")
29     parser.add_option("-C", "--config", dest="config", default=[], action="append",
30     help="configuration file")
31     (opts, args) = parser.parse_args(argv)
32     if opts.config =="":
33     opts.config = "config"
34 peller 1.1
35     #load config
36 nmohr 1.6 config = BetterConfigParser()
37 nmohr 1.9 config.read(opts.config)
38     anaTag = config.get("Analysis","tag")
39     run=opts.training
40     gui=opts.verbose
41 peller 1.7
42     #GLOABAL rescale from Train/Test Spliiting:
43     global_rescale=2.
44 peller 1.1
45     #get locations:
46 nmohr 1.10 MVAdir=config.get('Directories','vhbbpath')+'/data/'
47 peller 1.1
48     #systematics
49     systematics=config.get('systematics','systematics')
50     systematics=systematics.split(' ')
51    
52     weightF=config.get('Weights','weightF')
53    
54 peller 1.7
55    
56    
57 nmohr 1.9 def getTree(job,cut,path,subsample=-1):
58     print path+'/'+job.getpath()
59     newinput = TFile.Open(path+'/'+job.getpath(),'read')
60 peller 1.7 output.cd()
61     Tree = newinput.Get(job.tree)
62 peller 1.1 #Tree.SetDirectory(0)
63 peller 1.7
64    
65     if subsample>-1:
66 peller 1.8 #print 'cut: (%s) & (%s)'%(cut,job.subcuts[subsample])
67 peller 1.7 CuttedTree=Tree.CopyTree('(%s) & (%s)'%(cut,job.subcuts[subsample]))
68     #print '\t--> read in %s'%job.group[subsample]
69    
70     else:
71     CuttedTree=Tree.CopyTree(cut)
72     #print '\t--> read in %s'%job.name
73 peller 1.8 newinput.Close()
74 peller 1.7
75     #CuttedTree.SetDirectory(0)
76 peller 1.1 return CuttedTree
77    
78 peller 1.7 #def getScale(job,subsample=-1):
79     # input = TFile.Open(job.getpath())
80     # CountWithPU = input.Get("CountWithPU")
81     # CountWithPU2011B = input.Get("CountWithPU2011B")
82     # #print lumi*xsecs[i]/hist.GetBinContent(1)
83     # return float(job.lumi)*float(job.xsec)*float(job.sf)/(0.46502*CountWithPU.GetBinContent(1)+0.53498*CountWithPU2011B.GetBinContent(1))*2/float(job.split)
84 peller 1.1
85    
86    
87     #CONFIG
88     #factory
89     factoryname=config.get('factory','factoryname')
90     factorysettings=config.get('factory','factorysettings')
91     #MVA
92     MVAtype=config.get(run,'MVAtype')
93     MVAname=run
94     MVAsettings=config.get(run,'MVAsettings')
95 nmohr 1.10 fnameOutput = MVAdir+factoryname+'_'+MVAname+'.root'
96 peller 1.1 #locations
97     path=config.get(run,'path')
98    
99     TCutname=config.get(run, 'treeCut')
100     TCut=config.get('Cuts',TCutname)
101 peller 1.3 #print TCut
102 peller 1.1
103     #signals
104     signals=config.get(run,'signals')
105     signals=signals.split(' ')
106     #backgrounds
107     backgrounds=config.get(run,'backgrounds')
108     backgrounds=backgrounds.split(' ')
109    
110     treeVarSet=config.get(run,'treeVarSet')
111    
112     #variables
113     #TreeVar Array
114     MVA_Vars={}
115     MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
116     MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')
117     #Spectators:
118 peller 1.3 #spectators=config.get(treeVarSet,'spectators')
119     #spectators=spectators.split(' ')
120 peller 1.1
121     #TRAINING samples
122     infofile = open(path+'/samples.info','r')
123     info = pickle.load(infofile)
124     infofile.close()
125    
126     #Workdir
127     workdir=ROOT.gDirectory.GetPath()
128    
129    
130 nmohr 1.9 TrainCut='%s & EventForTraining==1'%TCut
131     EvalCut='%s & EventForTraining==0'%TCut
132 peller 1.1
133     #load TRAIN trees
134     Tbackgrounds = []
135     TbScales = []
136     Tsignals = []
137     TsScales = []
138    
139 peller 1.7
140    
141     output = ROOT.TFile.Open(fnameOutput, "RECREATE")
142    
143 peller 1.8 print '\n\t>>> TRAINING EVENTS <<<\n'
144 peller 1.7
145 peller 1.1 for job in info:
146 peller 1.7 if eval(job.active):
147 peller 1.8
148     if job.subsamples:
149     print '\tREADING IN SUBSAMPLES of %s'%job.name
150     for subsample in range(0,len(job.group)):
151     if job.subnames[subsample] in signals:
152     print '\t- %s as SIG'%job.group[subsample]
153 nmohr 1.9 Tsignal = getTree(job,TrainCut,path,subsample)
154 peller 1.8 ROOT.gDirectory.Cd(workdir)
155 peller 1.11 TsScale = getScale(job,path,config,global_rescale,subsample)
156 peller 1.8 Tsignals.append(Tsignal)
157     TsScales.append(TsScale)
158     print '\t\t\t%s events'%Tsignal.GetEntries()
159     elif job.subnames[subsample] in backgrounds:
160     print '\t- %s as BKG'%job.group[subsample]
161 nmohr 1.9 Tbackground = getTree(job,TrainCut,path,subsample)
162 peller 1.7 ROOT.gDirectory.Cd(workdir)
163 peller 1.11 TbScale = getScale(job,path,config,global_rescale,subsample)
164 peller 1.7 Tbackgrounds.append(Tbackground)
165     TbScales.append(TbScale)
166 peller 1.8 print '\t\t\t%s events'%Tbackground.GetEntries()
167    
168     else:
169     if job.name in signals:
170     print '\tREADING IN %s AS SIG'%job.name
171 nmohr 1.9 Tsignal = getTree(job,TrainCut,path)
172 peller 1.8 ROOT.gDirectory.Cd(workdir)
173 peller 1.11 TsScale = getScale(job,path,config,global_rescale)
174 peller 1.8 Tsignals.append(Tsignal)
175     TsScales.append(TsScale)
176     print '\t\t\t%s events'%Tsignal.GetEntries()
177     elif job.name in backgrounds:
178 peller 1.7 print '\tREADING IN %s AS BKG'%job.name
179 nmohr 1.9 Tbackground = getTree(job,TrainCut,path)
180 peller 1.7 ROOT.gDirectory.Cd(workdir)
181 peller 1.11 TbScale = getScale(job,path,config,global_rescale)
182 peller 1.7 Tbackgrounds.append(Tbackground)
183     TbScales.append(TbScale)
184 peller 1.8 print '\t\t\t%s events'%Tbackground.GetEntries()
185    
186    
187 peller 1.1 #load EVALUATE trees
188     Ebackgrounds = []
189     EbScales = []
190     Esignals = []
191     EsScales = []
192    
193 peller 1.8 print '\n\t>>> TESTING EVENTS <<<\n'
194 peller 1.7
195    
196 peller 1.1 for job in info:
197 peller 1.7 if eval(job.active):
198 peller 1.8
199     if job.subsamples:
200     print '\tREADING IN SUBSAMPLES of %s'%job.name
201     for subsample in range(0,len(job.group)):
202     if job.subnames[subsample] in signals:
203     print '\t- %s as SIG'%job.group[subsample]
204 nmohr 1.9 Esignal = getTree(job,EvalCut,path,subsample)
205 peller 1.8 ROOT.gDirectory.Cd(workdir)
206 peller 1.11 EsScale = getScale(job,path,config,global_rescale,subsample)
207 peller 1.8 Esignals.append(Esignal)
208     EsScales.append(EsScale)
209     print '\t\t\t%s events'%Esignal.GetEntries()
210     elif job.subnames[subsample] in backgrounds:
211     print '\t- %s as BKG'%job.group[subsample]
212 nmohr 1.9 Ebackground = getTree(job,EvalCut,path,subsample)
213 peller 1.7 ROOT.gDirectory.Cd(workdir)
214 peller 1.11 EbScale = getScale(job,path,config,global_rescale,subsample)
215 peller 1.7 Ebackgrounds.append(Ebackground)
216     EbScales.append(EbScale)
217 peller 1.8 print '\t\t\t%s events'%Ebackground.GetEntries()
218    
219     else:
220     if job.name in signals:
221     print '\tREADING IN %s AS SIG'%job.name
222 nmohr 1.9 Esignal = getTree(job,EvalCut,path)
223 peller 1.8 ROOT.gDirectory.Cd(workdir)
224 peller 1.11 EsScale = getScale(job,path,config,global_rescale)
225 peller 1.8 Esignals.append(Esignal)
226     EsScales.append(EsScale)
227     print '\t\t\t%s events'%Esignal.GetEntries()
228     elif job.name in backgrounds:
229 peller 1.7 print '\tREADING IN %s AS BKG'%job.name
230 nmohr 1.9 Ebackground = getTree(job,EvalCut,path)
231 peller 1.7 ROOT.gDirectory.Cd(workdir)
232 peller 1.11 EbScale = getScale(job,path,config,global_rescale)
233 peller 1.7 Ebackgrounds.append(Ebackground)
234     EbScales.append(EbScale)
235 peller 1.8 print '\t\t\t%s events'%Ebackground.GetEntries()
236 peller 1.1
237 peller 1.7
238     #output = ROOT.TFile.Open(fnameOutput, "RECREATE")
239 peller 1.1 factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
240    
241     #set input trees
242     for i in range(len(Tsignals)):
243    
244 peller 1.8 #print 'Number of SIG entries: %s'%Tsignals[i].GetEntries()
245 peller 1.1 factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
246 peller 1.8 #print 'Number of SIG entries: %s'%Esignals[i].GetEntries()
247 peller 1.1 factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
248    
249     for i in range(len(Tbackgrounds)):
250     if (Tbackgrounds[i].GetEntries()>0):
251 peller 1.8 #print 'Number of BKG entries: %s'%Tbackgrounds[i].GetEntries()
252 peller 1.1 factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
253    
254     if (Ebackgrounds[i].GetEntries()>0):
255 peller 1.8 #print 'Number of BKG entries: %s'%Ebackgrounds[i].GetEntries()
256 peller 1.1 factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
257    
258    
259     for var in MVA_Vars['Nominal']:
260     factory.AddVariable(var,'D') # add the variables
261     #for var in spectators:
262     # factory.AddSpectator(var,'D') #add specators
263    
264     #Execute TMVA
265     factory.SetSignalWeightExpression(weightF)
266     factory.Verbose()
267     factory.BookMethod(MVAtype,MVAname,MVAsettings)
268     factory.TrainAllMethods()
269     factory.TestAllMethods()
270     factory.EvaluateAllMethods()
271     output.Write()
272    
273     #WRITE INFOFILE
274 nmohr 1.10 infofile = open(MVAdir+factoryname+'_'+MVAname+'.info','w')
275 peller 1.1 info=mvainfo(MVAname)
276     info.factoryname=factoryname
277     info.factorysettings=factorysettings
278     info.MVAtype=MVAtype
279     info.MVAsettings=MVAsettings
280 nmohr 1.10 info.weightfilepath=MVAdir
281 peller 1.1 info.path=path
282     info.varset=treeVarSet
283     info.vars=MVA_Vars['Nominal']
284 peller 1.3 #info.spectators=spectators
285 peller 1.1 pickle.dump(info,infofile)
286     infofile.close()
287    
288     # open the TMVA Gui
289 nmohr 1.9 if gui == True:
290 peller 1.1 ROOT.gROOT.ProcessLine( ".L TMVAGui.C")
291     ROOT.gROOT.ProcessLine( "TMVAGui(\"%s\")" % fnameOutput )
292     ROOT.gApplication.Run()
293    
294