ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
Revision: 1.13
Committed: Thu Oct 11 16:53:25 2012 UTC (12 years, 7 months ago) by nmohr
Content type: text/x-python
Branch: MAIN
CVS Tags: hcpApproval, HCP_unblinding, hcpPreApp
Changes since 1.12: +2 -1 lines
Log Message:
Only one place for samples info

File Contents

# User Rev Content
1 peller 1.1 #!/usr/bin/env python
2 peller 1.5 from samplesclass import sample
3 peller 1.1 from printcolor import printc
4     import pickle
5     import ROOT
6     from ROOT import TFile, TTree
7     import ROOT
8     from array import array
9 nmohr 1.9 from optparse import OptionParser
10 nmohr 1.6 from BetterConfigParser import BetterConfigParser
11 peller 1.1 import sys
12     from mvainfos import mvainfo
13 peller 1.7 from gethistofromtree import getScale
14    
15 peller 1.1
16     #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
17    
18    
19     #usage: ./train run gui
20    
21    
22     #CONFIGURE
23 nmohr 1.9 argv = sys.argv
24     parser = OptionParser()
25     parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
26     help="Verbose mode.")
27     parser.add_option("-T", "--training", dest="training", default="",
28     help="Training")
29     parser.add_option("-C", "--config", dest="config", default=[], action="append",
30     help="configuration file")
31     (opts, args) = parser.parse_args(argv)
32     if opts.config =="":
33     opts.config = "config"
34 peller 1.1
35     #load config
36 nmohr 1.6 config = BetterConfigParser()
37 nmohr 1.9 config.read(opts.config)
38     anaTag = config.get("Analysis","tag")
39     run=opts.training
40     gui=opts.verbose
41 peller 1.7
42     #GLOABAL rescale from Train/Test Spliiting:
43     global_rescale=2.
44 peller 1.1
45     #get locations:
46 nmohr 1.10 MVAdir=config.get('Directories','vhbbpath')+'/data/'
47 nmohr 1.13 samplesinfo=config.get('Directories','samplesinfo')
48 peller 1.1
49     #systematics
50     systematics=config.get('systematics','systematics')
51     systematics=systematics.split(' ')
52    
53     weightF=config.get('Weights','weightF')
54    
55 peller 1.7
56    
57    
58 nmohr 1.9 def getTree(job,cut,path,subsample=-1):
59 peller 1.12 #print path+'/'+job.getpath()
60 nmohr 1.9 newinput = TFile.Open(path+'/'+job.getpath(),'read')
61 peller 1.7 output.cd()
62     Tree = newinput.Get(job.tree)
63 peller 1.1 #Tree.SetDirectory(0)
64 peller 1.7
65    
66     if subsample>-1:
67 peller 1.8 #print 'cut: (%s) & (%s)'%(cut,job.subcuts[subsample])
68 peller 1.7 CuttedTree=Tree.CopyTree('(%s) & (%s)'%(cut,job.subcuts[subsample]))
69     #print '\t--> read in %s'%job.group[subsample]
70    
71     else:
72     CuttedTree=Tree.CopyTree(cut)
73     #print '\t--> read in %s'%job.name
74 peller 1.8 newinput.Close()
75 peller 1.7
76     #CuttedTree.SetDirectory(0)
77 peller 1.1 return CuttedTree
78    
79 peller 1.7 #def getScale(job,subsample=-1):
80     # input = TFile.Open(job.getpath())
81     # CountWithPU = input.Get("CountWithPU")
82     # CountWithPU2011B = input.Get("CountWithPU2011B")
83     # #print lumi*xsecs[i]/hist.GetBinContent(1)
84     # return float(job.lumi)*float(job.xsec)*float(job.sf)/(0.46502*CountWithPU.GetBinContent(1)+0.53498*CountWithPU2011B.GetBinContent(1))*2/float(job.split)
85 peller 1.1
86    
87    
88     #CONFIG
89     #factory
90     factoryname=config.get('factory','factoryname')
91     factorysettings=config.get('factory','factorysettings')
92     #MVA
93     MVAtype=config.get(run,'MVAtype')
94     MVAname=run
95     MVAsettings=config.get(run,'MVAsettings')
96 nmohr 1.10 fnameOutput = MVAdir+factoryname+'_'+MVAname+'.root'
97 peller 1.1 #locations
98     path=config.get(run,'path')
99    
100     TCutname=config.get(run, 'treeCut')
101     TCut=config.get('Cuts',TCutname)
102 peller 1.3 #print TCut
103 peller 1.1
104     #signals
105     signals=config.get(run,'signals')
106     signals=signals.split(' ')
107     #backgrounds
108     backgrounds=config.get(run,'backgrounds')
109     backgrounds=backgrounds.split(' ')
110    
111     treeVarSet=config.get(run,'treeVarSet')
112    
113     #variables
114     #TreeVar Array
115     MVA_Vars={}
116     MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
117     MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')
118     #Spectators:
119 peller 1.3 #spectators=config.get(treeVarSet,'spectators')
120     #spectators=spectators.split(' ')
121 peller 1.1
122     #TRAINING samples
123 nmohr 1.13 infofile = open(samplesinfo,'r')
124 peller 1.1 info = pickle.load(infofile)
125     infofile.close()
126    
127     #Workdir
128     workdir=ROOT.gDirectory.GetPath()
129    
130    
131 nmohr 1.9 TrainCut='%s & EventForTraining==1'%TCut
132     EvalCut='%s & EventForTraining==0'%TCut
133 peller 1.1
134     #load TRAIN trees
135     Tbackgrounds = []
136     TbScales = []
137     Tsignals = []
138     TsScales = []
139    
140 peller 1.7
141    
142     output = ROOT.TFile.Open(fnameOutput, "RECREATE")
143    
144 peller 1.8 print '\n\t>>> TRAINING EVENTS <<<\n'
145 peller 1.7
146 peller 1.1 for job in info:
147 peller 1.7 if eval(job.active):
148 peller 1.8
149     if job.subsamples:
150     print '\tREADING IN SUBSAMPLES of %s'%job.name
151     for subsample in range(0,len(job.group)):
152     if job.subnames[subsample] in signals:
153     print '\t- %s as SIG'%job.group[subsample]
154 nmohr 1.9 Tsignal = getTree(job,TrainCut,path,subsample)
155 peller 1.8 ROOT.gDirectory.Cd(workdir)
156 peller 1.11 TsScale = getScale(job,path,config,global_rescale,subsample)
157 peller 1.8 Tsignals.append(Tsignal)
158     TsScales.append(TsScale)
159     print '\t\t\t%s events'%Tsignal.GetEntries()
160     elif job.subnames[subsample] in backgrounds:
161     print '\t- %s as BKG'%job.group[subsample]
162 nmohr 1.9 Tbackground = getTree(job,TrainCut,path,subsample)
163 peller 1.7 ROOT.gDirectory.Cd(workdir)
164 peller 1.11 TbScale = getScale(job,path,config,global_rescale,subsample)
165 peller 1.7 Tbackgrounds.append(Tbackground)
166     TbScales.append(TbScale)
167 peller 1.8 print '\t\t\t%s events'%Tbackground.GetEntries()
168    
169     else:
170     if job.name in signals:
171     print '\tREADING IN %s AS SIG'%job.name
172 nmohr 1.9 Tsignal = getTree(job,TrainCut,path)
173 peller 1.8 ROOT.gDirectory.Cd(workdir)
174 peller 1.11 TsScale = getScale(job,path,config,global_rescale)
175 peller 1.8 Tsignals.append(Tsignal)
176     TsScales.append(TsScale)
177     print '\t\t\t%s events'%Tsignal.GetEntries()
178     elif job.name in backgrounds:
179 peller 1.7 print '\tREADING IN %s AS BKG'%job.name
180 nmohr 1.9 Tbackground = getTree(job,TrainCut,path)
181 peller 1.7 ROOT.gDirectory.Cd(workdir)
182 peller 1.11 TbScale = getScale(job,path,config,global_rescale)
183 peller 1.7 Tbackgrounds.append(Tbackground)
184     TbScales.append(TbScale)
185 peller 1.8 print '\t\t\t%s events'%Tbackground.GetEntries()
186    
187    
188 peller 1.1 #load EVALUATE trees
189     Ebackgrounds = []
190     EbScales = []
191     Esignals = []
192     EsScales = []
193    
194 peller 1.8 print '\n\t>>> TESTING EVENTS <<<\n'
195 peller 1.7
196    
197 peller 1.1 for job in info:
198 peller 1.7 if eval(job.active):
199 peller 1.8
200     if job.subsamples:
201     print '\tREADING IN SUBSAMPLES of %s'%job.name
202     for subsample in range(0,len(job.group)):
203     if job.subnames[subsample] in signals:
204     print '\t- %s as SIG'%job.group[subsample]
205 nmohr 1.9 Esignal = getTree(job,EvalCut,path,subsample)
206 peller 1.8 ROOT.gDirectory.Cd(workdir)
207 peller 1.11 EsScale = getScale(job,path,config,global_rescale,subsample)
208 peller 1.8 Esignals.append(Esignal)
209     EsScales.append(EsScale)
210     print '\t\t\t%s events'%Esignal.GetEntries()
211     elif job.subnames[subsample] in backgrounds:
212     print '\t- %s as BKG'%job.group[subsample]
213 nmohr 1.9 Ebackground = getTree(job,EvalCut,path,subsample)
214 peller 1.7 ROOT.gDirectory.Cd(workdir)
215 peller 1.11 EbScale = getScale(job,path,config,global_rescale,subsample)
216 peller 1.7 Ebackgrounds.append(Ebackground)
217     EbScales.append(EbScale)
218 peller 1.8 print '\t\t\t%s events'%Ebackground.GetEntries()
219    
220     else:
221     if job.name in signals:
222     print '\tREADING IN %s AS SIG'%job.name
223 nmohr 1.9 Esignal = getTree(job,EvalCut,path)
224 peller 1.8 ROOT.gDirectory.Cd(workdir)
225 peller 1.11 EsScale = getScale(job,path,config,global_rescale)
226 peller 1.8 Esignals.append(Esignal)
227     EsScales.append(EsScale)
228     print '\t\t\t%s events'%Esignal.GetEntries()
229     elif job.name in backgrounds:
230 peller 1.7 print '\tREADING IN %s AS BKG'%job.name
231 nmohr 1.9 Ebackground = getTree(job,EvalCut,path)
232 peller 1.7 ROOT.gDirectory.Cd(workdir)
233 peller 1.11 EbScale = getScale(job,path,config,global_rescale)
234 peller 1.7 Ebackgrounds.append(Ebackground)
235     EbScales.append(EbScale)
236 peller 1.8 print '\t\t\t%s events'%Ebackground.GetEntries()
237 peller 1.1
238 peller 1.7
239     #output = ROOT.TFile.Open(fnameOutput, "RECREATE")
240 peller 1.1 factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
241    
242     #set input trees
243     for i in range(len(Tsignals)):
244    
245 peller 1.8 #print 'Number of SIG entries: %s'%Tsignals[i].GetEntries()
246 peller 1.1 factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
247 peller 1.8 #print 'Number of SIG entries: %s'%Esignals[i].GetEntries()
248 peller 1.1 factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
249    
250     for i in range(len(Tbackgrounds)):
251     if (Tbackgrounds[i].GetEntries()>0):
252 peller 1.8 #print 'Number of BKG entries: %s'%Tbackgrounds[i].GetEntries()
253 peller 1.1 factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
254    
255     if (Ebackgrounds[i].GetEntries()>0):
256 peller 1.8 #print 'Number of BKG entries: %s'%Ebackgrounds[i].GetEntries()
257 peller 1.1 factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
258    
259    
260     for var in MVA_Vars['Nominal']:
261     factory.AddVariable(var,'D') # add the variables
262     #for var in spectators:
263     # factory.AddSpectator(var,'D') #add specators
264    
265     #Execute TMVA
266     factory.SetSignalWeightExpression(weightF)
267 peller 1.12 factory.SetBackgroundWeightExpression(weightF)
268 peller 1.1 factory.Verbose()
269     factory.BookMethod(MVAtype,MVAname,MVAsettings)
270     factory.TrainAllMethods()
271     factory.TestAllMethods()
272     factory.EvaluateAllMethods()
273     output.Write()
274    
275     #WRITE INFOFILE
276 nmohr 1.10 infofile = open(MVAdir+factoryname+'_'+MVAname+'.info','w')
277 peller 1.1 info=mvainfo(MVAname)
278     info.factoryname=factoryname
279     info.factorysettings=factorysettings
280     info.MVAtype=MVAtype
281     info.MVAsettings=MVAsettings
282 nmohr 1.10 info.weightfilepath=MVAdir
283 peller 1.1 info.path=path
284     info.varset=treeVarSet
285     info.vars=MVA_Vars['Nominal']
286 peller 1.3 #info.spectators=spectators
287 peller 1.1 pickle.dump(info,infofile)
288     infofile.close()
289    
290     # open the TMVA Gui
291 nmohr 1.9 if gui == True:
292 peller 1.1 ROOT.gROOT.ProcessLine( ".L TMVAGui.C")
293     ROOT.gROOT.ProcessLine( "TMVAGui(\"%s\")" % fnameOutput )
294     ROOT.gApplication.Run()
295    
296