ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
Revision: 1.12
Committed: Fri Oct 5 16:24:52 2012 UTC (12 years, 7 months ago) by peller
Content type: text/x-python
Branch: MAIN
CVS Tags: hcpPreAppFreeze
Changes since 1.11: +2 -1 lines
Log Message:
weightF

File Contents

# User Rev Content
1 peller 1.1 #!/usr/bin/env python
2 peller 1.5 from samplesclass import sample
3 peller 1.1 from printcolor import printc
4     import pickle
5     import ROOT
6     from ROOT import TFile, TTree
7     import ROOT
8     from array import array
9 nmohr 1.9 from optparse import OptionParser
10 nmohr 1.6 from BetterConfigParser import BetterConfigParser
11 peller 1.1 import sys
12     from mvainfos import mvainfo
13 peller 1.7 from gethistofromtree import getScale
14    
15 peller 1.1
16     #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
17    
18    
19     #usage: ./train run gui
20    
21    
22     #CONFIGURE
23 nmohr 1.9 argv = sys.argv
24     parser = OptionParser()
25     parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
26     help="Verbose mode.")
27     parser.add_option("-T", "--training", dest="training", default="",
28     help="Training")
29     parser.add_option("-C", "--config", dest="config", default=[], action="append",
30     help="configuration file")
31     (opts, args) = parser.parse_args(argv)
32     if opts.config =="":
33     opts.config = "config"
34 peller 1.1
35     #load config
36 nmohr 1.6 config = BetterConfigParser()
37 nmohr 1.9 config.read(opts.config)
38     anaTag = config.get("Analysis","tag")
39     run=opts.training
40     gui=opts.verbose
41 peller 1.7
42     #GLOABAL rescale from Train/Test Spliiting:
43     global_rescale=2.
44 peller 1.1
45     #get locations:
46 nmohr 1.10 MVAdir=config.get('Directories','vhbbpath')+'/data/'
47 peller 1.1
48     #systematics
49     systematics=config.get('systematics','systematics')
50     systematics=systematics.split(' ')
51    
52     weightF=config.get('Weights','weightF')
53    
54 peller 1.7
55    
56    
57 nmohr 1.9 def getTree(job,cut,path,subsample=-1):
58 peller 1.12 #print path+'/'+job.getpath()
59 nmohr 1.9 newinput = TFile.Open(path+'/'+job.getpath(),'read')
60 peller 1.7 output.cd()
61     Tree = newinput.Get(job.tree)
62 peller 1.1 #Tree.SetDirectory(0)
63 peller 1.7
64    
65     if subsample>-1:
66 peller 1.8 #print 'cut: (%s) & (%s)'%(cut,job.subcuts[subsample])
67 peller 1.7 CuttedTree=Tree.CopyTree('(%s) & (%s)'%(cut,job.subcuts[subsample]))
68     #print '\t--> read in %s'%job.group[subsample]
69    
70     else:
71     CuttedTree=Tree.CopyTree(cut)
72     #print '\t--> read in %s'%job.name
73 peller 1.8 newinput.Close()
74 peller 1.7
75     #CuttedTree.SetDirectory(0)
76 peller 1.1 return CuttedTree
77    
78 peller 1.7 #def getScale(job,subsample=-1):
79     # input = TFile.Open(job.getpath())
80     # CountWithPU = input.Get("CountWithPU")
81     # CountWithPU2011B = input.Get("CountWithPU2011B")
82     # #print lumi*xsecs[i]/hist.GetBinContent(1)
83     # return float(job.lumi)*float(job.xsec)*float(job.sf)/(0.46502*CountWithPU.GetBinContent(1)+0.53498*CountWithPU2011B.GetBinContent(1))*2/float(job.split)
84 peller 1.1
85    
86    
87     #CONFIG
88     #factory
89     factoryname=config.get('factory','factoryname')
90     factorysettings=config.get('factory','factorysettings')
91     #MVA
92     MVAtype=config.get(run,'MVAtype')
93     MVAname=run
94     MVAsettings=config.get(run,'MVAsettings')
95 nmohr 1.10 fnameOutput = MVAdir+factoryname+'_'+MVAname+'.root'
96 peller 1.1 #locations
97     path=config.get(run,'path')
98    
99     TCutname=config.get(run, 'treeCut')
100     TCut=config.get('Cuts',TCutname)
101 peller 1.3 #print TCut
102 peller 1.1
103     #signals
104     signals=config.get(run,'signals')
105     signals=signals.split(' ')
106     #backgrounds
107     backgrounds=config.get(run,'backgrounds')
108     backgrounds=backgrounds.split(' ')
109    
110     treeVarSet=config.get(run,'treeVarSet')
111    
112     #variables
113     #TreeVar Array
114     MVA_Vars={}
115     MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
116     MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')
117     #Spectators:
118 peller 1.3 #spectators=config.get(treeVarSet,'spectators')
119     #spectators=spectators.split(' ')
120 peller 1.1
121     #TRAINING samples
122     infofile = open(path+'/samples.info','r')
123     info = pickle.load(infofile)
124     infofile.close()
125    
126     #Workdir
127     workdir=ROOT.gDirectory.GetPath()
128    
129    
130 nmohr 1.9 TrainCut='%s & EventForTraining==1'%TCut
131     EvalCut='%s & EventForTraining==0'%TCut
132 peller 1.1
133     #load TRAIN trees
134     Tbackgrounds = []
135     TbScales = []
136     Tsignals = []
137     TsScales = []
138    
139 peller 1.7
140    
141     output = ROOT.TFile.Open(fnameOutput, "RECREATE")
142    
143 peller 1.8 print '\n\t>>> TRAINING EVENTS <<<\n'
144 peller 1.7
145 peller 1.1 for job in info:
146 peller 1.7 if eval(job.active):
147 peller 1.8
148     if job.subsamples:
149     print '\tREADING IN SUBSAMPLES of %s'%job.name
150     for subsample in range(0,len(job.group)):
151     if job.subnames[subsample] in signals:
152     print '\t- %s as SIG'%job.group[subsample]
153 nmohr 1.9 Tsignal = getTree(job,TrainCut,path,subsample)
154 peller 1.8 ROOT.gDirectory.Cd(workdir)
155 peller 1.11 TsScale = getScale(job,path,config,global_rescale,subsample)
156 peller 1.8 Tsignals.append(Tsignal)
157     TsScales.append(TsScale)
158     print '\t\t\t%s events'%Tsignal.GetEntries()
159     elif job.subnames[subsample] in backgrounds:
160     print '\t- %s as BKG'%job.group[subsample]
161 nmohr 1.9 Tbackground = getTree(job,TrainCut,path,subsample)
162 peller 1.7 ROOT.gDirectory.Cd(workdir)
163 peller 1.11 TbScale = getScale(job,path,config,global_rescale,subsample)
164 peller 1.7 Tbackgrounds.append(Tbackground)
165     TbScales.append(TbScale)
166 peller 1.8 print '\t\t\t%s events'%Tbackground.GetEntries()
167    
168     else:
169     if job.name in signals:
170     print '\tREADING IN %s AS SIG'%job.name
171 nmohr 1.9 Tsignal = getTree(job,TrainCut,path)
172 peller 1.8 ROOT.gDirectory.Cd(workdir)
173 peller 1.11 TsScale = getScale(job,path,config,global_rescale)
174 peller 1.8 Tsignals.append(Tsignal)
175     TsScales.append(TsScale)
176     print '\t\t\t%s events'%Tsignal.GetEntries()
177     elif job.name in backgrounds:
178 peller 1.7 print '\tREADING IN %s AS BKG'%job.name
179 nmohr 1.9 Tbackground = getTree(job,TrainCut,path)
180 peller 1.7 ROOT.gDirectory.Cd(workdir)
181 peller 1.11 TbScale = getScale(job,path,config,global_rescale)
182 peller 1.7 Tbackgrounds.append(Tbackground)
183     TbScales.append(TbScale)
184 peller 1.8 print '\t\t\t%s events'%Tbackground.GetEntries()
185    
186    
187 peller 1.1 #load EVALUATE trees
188     Ebackgrounds = []
189     EbScales = []
190     Esignals = []
191     EsScales = []
192    
193 peller 1.8 print '\n\t>>> TESTING EVENTS <<<\n'
194 peller 1.7
195    
196 peller 1.1 for job in info:
197 peller 1.7 if eval(job.active):
198 peller 1.8
199     if job.subsamples:
200     print '\tREADING IN SUBSAMPLES of %s'%job.name
201     for subsample in range(0,len(job.group)):
202     if job.subnames[subsample] in signals:
203     print '\t- %s as SIG'%job.group[subsample]
204 nmohr 1.9 Esignal = getTree(job,EvalCut,path,subsample)
205 peller 1.8 ROOT.gDirectory.Cd(workdir)
206 peller 1.11 EsScale = getScale(job,path,config,global_rescale,subsample)
207 peller 1.8 Esignals.append(Esignal)
208     EsScales.append(EsScale)
209     print '\t\t\t%s events'%Esignal.GetEntries()
210     elif job.subnames[subsample] in backgrounds:
211     print '\t- %s as BKG'%job.group[subsample]
212 nmohr 1.9 Ebackground = getTree(job,EvalCut,path,subsample)
213 peller 1.7 ROOT.gDirectory.Cd(workdir)
214 peller 1.11 EbScale = getScale(job,path,config,global_rescale,subsample)
215 peller 1.7 Ebackgrounds.append(Ebackground)
216     EbScales.append(EbScale)
217 peller 1.8 print '\t\t\t%s events'%Ebackground.GetEntries()
218    
219     else:
220     if job.name in signals:
221     print '\tREADING IN %s AS SIG'%job.name
222 nmohr 1.9 Esignal = getTree(job,EvalCut,path)
223 peller 1.8 ROOT.gDirectory.Cd(workdir)
224 peller 1.11 EsScale = getScale(job,path,config,global_rescale)
225 peller 1.8 Esignals.append(Esignal)
226     EsScales.append(EsScale)
227     print '\t\t\t%s events'%Esignal.GetEntries()
228     elif job.name in backgrounds:
229 peller 1.7 print '\tREADING IN %s AS BKG'%job.name
230 nmohr 1.9 Ebackground = getTree(job,EvalCut,path)
231 peller 1.7 ROOT.gDirectory.Cd(workdir)
232 peller 1.11 EbScale = getScale(job,path,config,global_rescale)
233 peller 1.7 Ebackgrounds.append(Ebackground)
234     EbScales.append(EbScale)
235 peller 1.8 print '\t\t\t%s events'%Ebackground.GetEntries()
236 peller 1.1
237 peller 1.7
238     #output = ROOT.TFile.Open(fnameOutput, "RECREATE")
239 peller 1.1 factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
240    
241     #set input trees
242     for i in range(len(Tsignals)):
243    
244 peller 1.8 #print 'Number of SIG entries: %s'%Tsignals[i].GetEntries()
245 peller 1.1 factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
246 peller 1.8 #print 'Number of SIG entries: %s'%Esignals[i].GetEntries()
247 peller 1.1 factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
248    
249     for i in range(len(Tbackgrounds)):
250     if (Tbackgrounds[i].GetEntries()>0):
251 peller 1.8 #print 'Number of BKG entries: %s'%Tbackgrounds[i].GetEntries()
252 peller 1.1 factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
253    
254     if (Ebackgrounds[i].GetEntries()>0):
255 peller 1.8 #print 'Number of BKG entries: %s'%Ebackgrounds[i].GetEntries()
256 peller 1.1 factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
257    
258    
259     for var in MVA_Vars['Nominal']:
260     factory.AddVariable(var,'D') # add the variables
261     #for var in spectators:
262     # factory.AddSpectator(var,'D') #add specators
263    
264     #Execute TMVA
265     factory.SetSignalWeightExpression(weightF)
266 peller 1.12 factory.SetBackgroundWeightExpression(weightF)
267 peller 1.1 factory.Verbose()
268     factory.BookMethod(MVAtype,MVAname,MVAsettings)
269     factory.TrainAllMethods()
270     factory.TestAllMethods()
271     factory.EvaluateAllMethods()
272     output.Write()
273    
274     #WRITE INFOFILE
275 nmohr 1.10 infofile = open(MVAdir+factoryname+'_'+MVAname+'.info','w')
276 peller 1.1 info=mvainfo(MVAname)
277     info.factoryname=factoryname
278     info.factorysettings=factorysettings
279     info.MVAtype=MVAtype
280     info.MVAsettings=MVAsettings
281 nmohr 1.10 info.weightfilepath=MVAdir
282 peller 1.1 info.path=path
283     info.varset=treeVarSet
284     info.vars=MVA_Vars['Nominal']
285 peller 1.3 #info.spectators=spectators
286 peller 1.1 pickle.dump(info,infofile)
287     infofile.close()
288    
289     # open the TMVA Gui
290 nmohr 1.9 if gui == True:
291 peller 1.1 ROOT.gROOT.ProcessLine( ".L TMVAGui.C")
292     ROOT.gROOT.ProcessLine( "TMVAGui(\"%s\")" % fnameOutput )
293     ROOT.gApplication.Run()
294    
295