ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
Revision: 1.8
Committed: Fri Aug 3 15:43:03 2012 UTC (12 years, 9 months ago) by peller
Content type: text/x-python
Branch: MAIN
Changes since 1.7: +66 -39 lines
Log Message:
added samples configs w/o splitting and improved training script

File Contents

# User Rev Content
1 peller 1.1 #!/usr/bin/env python
2 peller 1.5 from samplesclass import sample
3 peller 1.1 from printcolor import printc
4     import pickle
5     import ROOT
6     from ROOT import TFile, TTree
7     import ROOT
8     from array import array
9 nmohr 1.6 from BetterConfigParser import BetterConfigParser
10 peller 1.1 import sys
11     from mvainfos import mvainfo
12 peller 1.7 from gethistofromtree import getScale
13    
14 peller 1.1
15     #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
16    
17    
18     #usage: ./train run gui
19    
20    
21     #CONFIGURE
22    
23     #load config
24 nmohr 1.6 config = BetterConfigParser()
25 peller 1.7 #config.read('./config')
26 peller 1.8 #config.read('./config7TeV')
27     config.read('./config7TeV_ZZ')
28 peller 1.7
29    
30     #GLOABAL rescale from Train/Test Spliiting:
31     global_rescale=2.
32 peller 1.1
33     #get locations:
34     Wdir=config.get('Directories','Wdir')
35    
36     #systematics
37     systematics=config.get('systematics','systematics')
38     systematics=systematics.split(' ')
39    
40     weightF=config.get('Weights','weightF')
41    
42 peller 1.7
43    
44    
45     def getTree(job,cut,subsample=-1):
46    
47     newinput = TFile.Open(job.getpath(),'read')
48     output.cd()
49     Tree = newinput.Get(job.tree)
50 peller 1.1 #Tree.SetDirectory(0)
51 peller 1.7
52    
53     if subsample>-1:
54 peller 1.8 #print 'cut: (%s) & (%s)'%(cut,job.subcuts[subsample])
55 peller 1.7 CuttedTree=Tree.CopyTree('(%s) & (%s)'%(cut,job.subcuts[subsample]))
56     #print '\t--> read in %s'%job.group[subsample]
57    
58     else:
59     CuttedTree=Tree.CopyTree(cut)
60     #print '\t--> read in %s'%job.name
61 peller 1.8 newinput.Close()
62 peller 1.7
63     #CuttedTree.SetDirectory(0)
64 peller 1.1 return CuttedTree
65    
66 peller 1.7 #def getScale(job,subsample=-1):
67     # input = TFile.Open(job.getpath())
68     # CountWithPU = input.Get("CountWithPU")
69     # CountWithPU2011B = input.Get("CountWithPU2011B")
70     # #print lumi*xsecs[i]/hist.GetBinContent(1)
71     # return float(job.lumi)*float(job.xsec)*float(job.sf)/(0.46502*CountWithPU.GetBinContent(1)+0.53498*CountWithPU2011B.GetBinContent(1))*2/float(job.split)
72 peller 1.1
73     run=sys.argv[1]
74     gui=sys.argv[2]
75    
76    
77     #CONFIG
78     #factory
79     factoryname=config.get('factory','factoryname')
80     factorysettings=config.get('factory','factorysettings')
81     #MVA
82     MVAtype=config.get(run,'MVAtype')
83     MVAname=run
84     MVAsettings=config.get(run,'MVAsettings')
85     fnameOutput = Wdir +'/weights/'+factoryname+'_'+MVAname+'.root'
86     #locations
87     path=config.get(run,'path')
88    
89     TCutname=config.get(run, 'treeCut')
90     TCut=config.get('Cuts',TCutname)
91 peller 1.3 #print TCut
92 peller 1.1
93     #signals
94     signals=config.get(run,'signals')
95     signals=signals.split(' ')
96     #backgrounds
97     backgrounds=config.get(run,'backgrounds')
98     backgrounds=backgrounds.split(' ')
99    
100     treeVarSet=config.get(run,'treeVarSet')
101    
102     #variables
103     #TreeVar Array
104     MVA_Vars={}
105     MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
106     MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')
107     #Spectators:
108 peller 1.3 #spectators=config.get(treeVarSet,'spectators')
109     #spectators=spectators.split(' ')
110 peller 1.1
111     #TRAINING samples
112     infofile = open(path+'/samples.info','r')
113     info = pickle.load(infofile)
114     infofile.close()
115    
116     #Workdir
117     workdir=ROOT.gDirectory.GetPath()
118    
119    
120     TrainCut='%s && EventForTraining==1'%TCut
121     EvalCut='%s && EventForTraining==0'%TCut
122    
123     #load TRAIN trees
124     Tbackgrounds = []
125     TbScales = []
126     Tsignals = []
127     TsScales = []
128    
129 peller 1.7
130    
131     output = ROOT.TFile.Open(fnameOutput, "RECREATE")
132    
133 peller 1.8 print '\n\t>>> TRAINING EVENTS <<<\n'
134 peller 1.7
135 peller 1.1 for job in info:
136 peller 1.7 if eval(job.active):
137 peller 1.8
138     if job.subsamples:
139     print '\tREADING IN SUBSAMPLES of %s'%job.name
140     for subsample in range(0,len(job.group)):
141     if job.subnames[subsample] in signals:
142     print '\t- %s as SIG'%job.group[subsample]
143     Tsignal = getTree(job,TrainCut,subsample)
144     ROOT.gDirectory.Cd(workdir)
145     TsScale = getScale(job,global_rescale,subsample)
146     Tsignals.append(Tsignal)
147     TsScales.append(TsScale)
148     print '\t\t\t%s events'%Tsignal.GetEntries()
149     elif job.subnames[subsample] in backgrounds:
150     print '\t- %s as BKG'%job.group[subsample]
151 peller 1.7 Tbackground = getTree(job,TrainCut,subsample)
152     ROOT.gDirectory.Cd(workdir)
153     TbScale = getScale(job,global_rescale,subsample)
154     Tbackgrounds.append(Tbackground)
155     TbScales.append(TbScale)
156 peller 1.8 print '\t\t\t%s events'%Tbackground.GetEntries()
157    
158     else:
159     if job.name in signals:
160     print '\tREADING IN %s AS SIG'%job.name
161     Tsignal = getTree(job,TrainCut)
162     ROOT.gDirectory.Cd(workdir)
163     TsScale = getScale(job,global_rescale)
164     Tsignals.append(Tsignal)
165     TsScales.append(TsScale)
166     print '\t\t\t%s events'%Tsignal.GetEntries()
167     elif job.name in backgrounds:
168 peller 1.7 print '\tREADING IN %s AS BKG'%job.name
169     Tbackground = getTree(job,TrainCut)
170     ROOT.gDirectory.Cd(workdir)
171     TbScale = getScale(job,global_rescale)
172     Tbackgrounds.append(Tbackground)
173     TbScales.append(TbScale)
174 peller 1.8 print '\t\t\t%s events'%Tbackground.GetEntries()
175    
176    
177 peller 1.1 #load EVALUATE trees
178     Ebackgrounds = []
179     EbScales = []
180     Esignals = []
181     EsScales = []
182    
183 peller 1.8 print '\n\t>>> TESTING EVENTS <<<\n'
184 peller 1.7
185    
186 peller 1.1 for job in info:
187 peller 1.7 if eval(job.active):
188 peller 1.8
189     if job.subsamples:
190     print '\tREADING IN SUBSAMPLES of %s'%job.name
191     for subsample in range(0,len(job.group)):
192     if job.subnames[subsample] in signals:
193     print '\t- %s as SIG'%job.group[subsample]
194     Esignal = getTree(job,EvalCut,subsample)
195     ROOT.gDirectory.Cd(workdir)
196     EsScale = getScale(job,global_rescale,subsample)
197     Esignals.append(Esignal)
198     EsScales.append(EsScale)
199     print '\t\t\t%s events'%Esignal.GetEntries()
200     elif job.subnames[subsample] in backgrounds:
201     print '\t- %s as BKG'%job.group[subsample]
202 peller 1.7 Ebackground = getTree(job,EvalCut,subsample)
203     ROOT.gDirectory.Cd(workdir)
204     EbScale = getScale(job,global_rescale,subsample)
205     Ebackgrounds.append(Ebackground)
206     EbScales.append(EbScale)
207 peller 1.8 print '\t\t\t%s events'%Ebackground.GetEntries()
208    
209     else:
210     if job.name in signals:
211     print '\tREADING IN %s AS SIG'%job.name
212     Esignal = getTree(job,EvalCut)
213     ROOT.gDirectory.Cd(workdir)
214     EsScale = getScale(job,global_rescale)
215     Esignals.append(Esignal)
216     EsScales.append(EsScale)
217     print '\t\t\t%s events'%Esignal.GetEntries()
218     elif job.name in backgrounds:
219 peller 1.7 print '\tREADING IN %s AS BKG'%job.name
220     Ebackground = getTree(job,EvalCut)
221     ROOT.gDirectory.Cd(workdir)
222     EbScale = getScale(job,global_rescale)
223     Ebackgrounds.append(Ebackground)
224     EbScales.append(EbScale)
225 peller 1.8 print '\t\t\t%s events'%Ebackground.GetEntries()
226 peller 1.1
227 peller 1.7
228     #output = ROOT.TFile.Open(fnameOutput, "RECREATE")
229 peller 1.1 factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
230    
231     #set input trees
232     for i in range(len(Tsignals)):
233    
234 peller 1.8 #print 'Number of SIG entries: %s'%Tsignals[i].GetEntries()
235 peller 1.1 factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
236 peller 1.8 #print 'Number of SIG entries: %s'%Esignals[i].GetEntries()
237 peller 1.1 factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
238    
239     for i in range(len(Tbackgrounds)):
240     if (Tbackgrounds[i].GetEntries()>0):
241 peller 1.8 #print 'Number of BKG entries: %s'%Tbackgrounds[i].GetEntries()
242 peller 1.1 factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
243    
244     if (Ebackgrounds[i].GetEntries()>0):
245 peller 1.8 #print 'Number of BKG entries: %s'%Ebackgrounds[i].GetEntries()
246 peller 1.1 factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
247    
248    
249     for var in MVA_Vars['Nominal']:
250     factory.AddVariable(var,'D') # add the variables
251     #for var in spectators:
252     # factory.AddSpectator(var,'D') #add specators
253    
254     #Execute TMVA
255     factory.SetSignalWeightExpression(weightF)
256     factory.Verbose()
257     factory.BookMethod(MVAtype,MVAname,MVAsettings)
258     factory.TrainAllMethods()
259     factory.TestAllMethods()
260     factory.EvaluateAllMethods()
261     output.Write()
262    
263     #WRITE INFOFILE
264     infofile = open(Wdir+'/weights/'+factoryname+'_'+MVAname+'.info','w')
265     info=mvainfo(MVAname)
266     info.factoryname=factoryname
267     info.factorysettings=factorysettings
268     info.MVAtype=MVAtype
269     info.MVAsettings=MVAsettings
270     info.weightfilepath=Wdir+'/weights'
271     info.path=path
272     info.varset=treeVarSet
273     info.vars=MVA_Vars['Nominal']
274 peller 1.3 #info.spectators=spectators
275 peller 1.1 pickle.dump(info,infofile)
276     infofile.close()
277    
278     # open the TMVA Gui
279     if gui == 'gui':
280     ROOT.gROOT.ProcessLine( ".L TMVAGui.C")
281     ROOT.gROOT.ProcessLine( "TMVAGui(\"%s\")" % fnameOutput )
282     ROOT.gApplication.Run()
283    
284