ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
Revision: 1.7
Committed: Thu Aug 2 16:03:52 2012 UTC (12 years, 9 months ago) by peller
Content type: text/x-python
Branch: MAIN
Changes since 1.6: +103 -40 lines
Log Message:
removed flavour splitting, fixed training readin problem

File Contents

# Content
1 #!/usr/bin/env python
2 from samplesclass import sample
3 from printcolor import printc
4 import pickle
5 import ROOT
6 from ROOT import TFile, TTree
7 import ROOT
8 from array import array
9 from BetterConfigParser import BetterConfigParser
10 import sys
11 from mvainfos import mvainfo
12 from gethistofromtree import getScale
13
14
15 #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
16
17
18 #usage: ./train run gui
19
20
21 #CONFIGURE
22
23 #load config
24 config = BetterConfigParser()
25 #config.read('./config')
26 config.read('./config7TeV')
27
28
29 #GLOABAL rescale from Train/Test Spliiting:
30 global_rescale=2.
31
32 #get locations:
33 Wdir=config.get('Directories','Wdir')
34
35 #systematics
36 systematics=config.get('systematics','systematics')
37 systematics=systematics.split(' ')
38
39 weightF=config.get('Weights','weightF')
40
41
42
43
44 def getTree(job,cut,subsample=-1):
45
46 newinput = TFile.Open(job.getpath(),'read')
47 output.cd()
48 Tree = newinput.Get(job.tree)
49 #Tree.SetDirectory(0)
50
51
52 if subsample>-1:
53 CuttedTree=Tree.CopyTree('(%s) & (%s)'%(cut,job.subcuts[subsample]))
54 #print '\t--> read in %s'%job.group[subsample]
55
56 else:
57 CuttedTree=Tree.CopyTree(cut)
58 #print '\t--> read in %s'%job.name
59
60
61 #CuttedTree.SetDirectory(0)
62 return CuttedTree
63
64 #def getScale(job,subsample=-1):
65 # input = TFile.Open(job.getpath())
66 # CountWithPU = input.Get("CountWithPU")
67 # CountWithPU2011B = input.Get("CountWithPU2011B")
68 # #print lumi*xsecs[i]/hist.GetBinContent(1)
69 # return float(job.lumi)*float(job.xsec)*float(job.sf)/(0.46502*CountWithPU.GetBinContent(1)+0.53498*CountWithPU2011B.GetBinContent(1))*2/float(job.split)
70
71 run=sys.argv[1]
72 gui=sys.argv[2]
73
74
75 #CONFIG
76 #factory
77 factoryname=config.get('factory','factoryname')
78 factorysettings=config.get('factory','factorysettings')
79 #MVA
80 MVAtype=config.get(run,'MVAtype')
81 MVAname=run
82 MVAsettings=config.get(run,'MVAsettings')
83 fnameOutput = Wdir +'/weights/'+factoryname+'_'+MVAname+'.root'
84 #locations
85 path=config.get(run,'path')
86
87 TCutname=config.get(run, 'treeCut')
88 TCut=config.get('Cuts',TCutname)
89 #print TCut
90
91 #signals
92 signals=config.get(run,'signals')
93 signals=signals.split(' ')
94 #backgrounds
95 backgrounds=config.get(run,'backgrounds')
96 backgrounds=backgrounds.split(' ')
97
98 treeVarSet=config.get(run,'treeVarSet')
99
100 #variables
101 #TreeVar Array
102 MVA_Vars={}
103 MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
104 MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')
105 #Spectators:
106 #spectators=config.get(treeVarSet,'spectators')
107 #spectators=spectators.split(' ')
108
109 #TRAINING samples
110 infofile = open(path+'/samples.info','r')
111 info = pickle.load(infofile)
112 infofile.close()
113
114 #Workdir
115 workdir=ROOT.gDirectory.GetPath()
116
117
118 TrainCut='%s && EventForTraining==1'%TCut
119 EvalCut='%s && EventForTraining==0'%TCut
120
121 #load TRAIN trees
122 Tbackgrounds = []
123 TbScales = []
124 Tsignals = []
125 TsScales = []
126
127
128
129 output = ROOT.TFile.Open(fnameOutput, "RECREATE")
130
131 print '\n*** TRAINING EVENTS ***\n'
132
133 for job in info:
134 if eval(job.active):
135 if job.name in signals:
136 print '\tREADING IN %s AS SIG'%job.name
137 Tsignal = getTree(job,TrainCut)
138 ROOT.gDirectory.Cd(workdir)
139 TsScale = getScale(job,global_rescale)
140 Tsignals.append(Tsignal)
141 TsScales.append(TsScale)
142
143 if job.name in backgrounds:
144 if job.subsamples:
145 print '\tREADING IN SUBSAMPLES of %s AS BKG'%job.name
146 for subsample in range(0,len(job.group)):
147 print '\t- %s'%job.group[subsample]
148 Tbackground = getTree(job,TrainCut,subsample)
149 ROOT.gDirectory.Cd(workdir)
150 TbScale = getScale(job,global_rescale,subsample)
151 Tbackgrounds.append(Tbackground)
152 TbScales.append(TbScale)
153
154
155 else:
156 print '\tREADING IN %s AS BKG'%job.name
157 Tbackground = getTree(job,TrainCut)
158 ROOT.gDirectory.Cd(workdir)
159 TbScale = getScale(job,global_rescale)
160 Tbackgrounds.append(Tbackground)
161 TbScales.append(TbScale)
162
163 #load EVALUATE trees
164 Ebackgrounds = []
165 EbScales = []
166 Esignals = []
167 EsScales = []
168
169 print '\n*** TESTING EVENTS ***\n'
170
171
172 for job in info:
173 if eval(job.active):
174
175 if job.name in signals:
176 print '\tREADING IN %s AS SIG'%job.name
177 Esignal = getTree(job,EvalCut)
178 ROOT.gDirectory.Cd(workdir)
179 EsScale = getScale(job,global_rescale)
180 Esignals.append(Esignal)
181 EsScales.append(EsScale)
182
183 if job.name in backgrounds:
184 if job.subsamples:
185 print '\tREADING IN SUBSAMPLES of %s AS BKG'%job.name
186 for subsample in range(0,len(job.group)):
187 print '\t- %s'%job.group[subsample]
188 Ebackground = getTree(job,EvalCut,subsample)
189 ROOT.gDirectory.Cd(workdir)
190 EbScale = getScale(job,global_rescale,subsample)
191 Ebackgrounds.append(Ebackground)
192 EbScales.append(EbScale)
193
194
195 else:
196 print '\tREADING IN %s AS BKG'%job.name
197 Ebackground = getTree(job,EvalCut)
198 ROOT.gDirectory.Cd(workdir)
199 EbScale = getScale(job,global_rescale)
200 Ebackgrounds.append(Ebackground)
201 EbScales.append(EbScale)
202
203
204
205 #output = ROOT.TFile.Open(fnameOutput, "RECREATE")
206 factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
207
208 #set input trees
209 for i in range(len(Tsignals)):
210
211 factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
212 factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
213
214 for i in range(len(Tbackgrounds)):
215 if (Tbackgrounds[i].GetEntries()>0):
216 factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
217
218 if (Ebackgrounds[i].GetEntries()>0):
219 factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
220
221
222 for var in MVA_Vars['Nominal']:
223 factory.AddVariable(var,'D') # add the variables
224 #for var in spectators:
225 # factory.AddSpectator(var,'D') #add specators
226
227 #Execute TMVA
228 factory.SetSignalWeightExpression(weightF)
229 factory.Verbose()
230 factory.BookMethod(MVAtype,MVAname,MVAsettings)
231 factory.TrainAllMethods()
232 factory.TestAllMethods()
233 factory.EvaluateAllMethods()
234 output.Write()
235
236 #WRITE INFOFILE
237 infofile = open(Wdir+'/weights/'+factoryname+'_'+MVAname+'.info','w')
238 info=mvainfo(MVAname)
239 info.factoryname=factoryname
240 info.factorysettings=factorysettings
241 info.MVAtype=MVAtype
242 info.MVAsettings=MVAsettings
243 info.weightfilepath=Wdir+'/weights'
244 info.path=path
245 info.varset=treeVarSet
246 info.vars=MVA_Vars['Nominal']
247 #info.spectators=spectators
248 pickle.dump(info,infofile)
249 infofile.close()
250
251 # open the TMVA Gui
252 if gui == 'gui':
253 ROOT.gROOT.ProcessLine( ".L TMVAGui.C")
254 ROOT.gROOT.ProcessLine( "TMVAGui(\"%s\")" % fnameOutput )
255 ROOT.gApplication.Run()
256
257