ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
Revision: 1.26
Committed: Fri Apr 12 09:33:15 2013 UTC (12 years, 1 month ago) by peller
Content type: text/x-python
Branch: MAIN
CVS Tags: lhcp_UnblindFix, hcp_Unblind, HEAD
Changes since 1.25: +1 -1 lines
Log Message:
fix in MVAtype

File Contents

# Content
1 #!/usr/bin/env python
2 from optparse import OptionParser
3 import sys
4 import pickle
5 import ROOT
6 ROOT.gROOT.SetBatch(True)
7 from array import array
8 #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
9 #usage: ./train run gui
10
11 #CONFIGURE
12 argv = sys.argv
13 parser = OptionParser()
14 parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
15 help="Verbose mode.")
16 parser.add_option("-T", "--training", dest="training", default="",
17 help="Training")
18 parser.add_option("-C", "--config", dest="config", default=[], action="append",
19 help="configuration file")
20 parser.add_option("-S","--setting", dest="MVAsettings", default='',
21 help="Parameter setting string")
22 parser.add_option("-N","--name", dest="set_name", default='',
23 help="Parameter setting name. Output files will have this name")
24 parser.add_option("-L","--local",dest="local", default=True,
25 help="True to run it locally. False to run on batch system using config")
26
27 (opts, args) = parser.parse_args(argv)
28 if opts.config =="":
29 opts.config = "config"
30
31 #Import after configure to get help message
32 from myutils import BetterConfigParser, mvainfo, ParseInfo, TreeCache
33
34 #load config
35 config = BetterConfigParser()
36 config.read(opts.config)
37 anaTag = config.get("Analysis","tag")
38 run=opts.training
39 gui=opts.verbose
40
41 #GLOABAL rescale from Train/Test Spliiting:
42 global_rescale=2.
43
44 #get locations:
45 MVAdir=config.get('Directories','vhbbpath')+'/data/'
46 samplesinfo=config.get('Directories','samplesinfo')
47
48 #systematics
49 systematics=config.get('systematics','systematics')
50 systematics=systematics.split(' ')
51
52 weightF=config.get('Weights','weightF')
53
54 VHbbNameSpace=config.get('VHbbNameSpace','library')
55 ROOT.gSystem.Load(VHbbNameSpace)
56
57 #CONFIG
58 #factory
59 factoryname=config.get('factory','factoryname')
60 factorysettings=config.get('factory','factorysettings')
61 #MVA
62 MVAtype=config.get(run,'MVAtype')
63 #MVA name and settings. From local running or batch running different option
64 print opts.local
65 if(eval(opts.local)):
66 print 'Local run'
67 MVAname=run
68 MVAsettings=config.get(run,'MVAsettings')
69 elif(opts.set_name!='' and opts.MVAsettings!=''):
70 print 'Batch run'
71 MVAname=opts.set_name
72 MVAsettings=opts.MVAsettings
73 else :
74 print 'Problem in configuration. Missing or inconsitent information Check input options'
75 sys.exit()
76 print '@DEBUG: MVAname'
77 print 'input : ' + opts.set_name
78 print 'used : ' + MVAname
79
80 fnameOutput = MVAdir+factoryname+'_'+MVAname+'.root'
81 print '@DEBUG: output file name : ' + fnameOutput
82
83 #locations
84 path=config.get('Directories','SYSout')
85
86 TCutname=config.get(run, 'treeCut')
87 TCut=config.get('Cuts',TCutname)
88 #print TCut
89
90 #signals
91 signals=config.get(run,'signals')
92 signals=eval(signals)
93 #backgrounds
94 backgrounds=config.get(run,'backgrounds')
95 backgrounds=eval(backgrounds)
96 treeVarSet=config.get(run,'treeVarSet')
97
98 #variables
99 #TreeVar Array
100 MVA_Vars={}
101 MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
102 MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')
103
104 #Infofile
105 info = ParseInfo(samplesinfo,path)
106
107 #Workdir
108 workdir=ROOT.gDirectory.GetPath()
109
110
111 TrainCut='%s & EventForTraining==1'%TCut
112 EvalCut='%s & EventForTraining==0'%TCut
113 cuts = [TrainCut,EvalCut]
114
115
116 samples = []
117 samples = info.get_samples(signals+backgrounds)
118
119 tc = TreeCache(cuts,samples,path,config)
120
121 output = ROOT.TFile.Open(fnameOutput, "RECREATE")
122
123 print '\n\t>>> READING EVENTS <<<\n'
124
125 signal_samples = info.get_samples(signals)
126 background_samples = info.get_samples(backgrounds)
127
128 #TRAIN trees
129 Tbackgrounds = []
130 TbScales = []
131 Tsignals = []
132 TsScales = []
133 #EVAL trees
134 Ebackgrounds = []
135 EbScales = []
136 Esignals = []
137 EsScales = []
138
139 #load trees
140 for job in signal_samples:
141 print '\tREADING IN %s AS SIG'%job.name
142 Tsignal = tc.get_tree(job,TrainCut)
143 ROOT.gDirectory.Cd(workdir)
144 TsScale = tc.get_scale(job,config)*global_rescale
145 Tsignals.append(Tsignal)
146 TsScales.append(TsScale)
147 Esignal = tc.get_tree(job,EvalCut)
148 Esignals.append(Esignal)
149 EsScales.append(TsScale)
150 print '\t\t\tTraining %s events'%Tsignal.GetEntries()
151 print '\t\t\tEval %s events'%Esignal.GetEntries()
152 for job in background_samples:
153 print '\tREADING IN %s AS BKG'%job.name
154 Tbackground = tc.get_tree(job,TrainCut)
155 ROOT.gDirectory.Cd(workdir)
156 TbScale = tc.get_scale(job,config)*global_rescale
157 Tbackgrounds.append(Tbackground)
158 TbScales.append(TbScale)
159 Ebackground = tc.get_tree(job,EvalCut)
160 ROOT.gDirectory.Cd(workdir)
161 Ebackgrounds.append(Ebackground)
162 EbScales.append(TbScale)
163 print '\t\t\tTraining %s events'%Tbackground.GetEntries()
164 print '\t\t\tEval %s events'%Ebackground.GetEntries()
165
166
167 factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
168
169 #set input trees
170 for i in range(len(Tsignals)):
171 factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
172 factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
173
174 for i in range(len(Tbackgrounds)):
175 if (Tbackgrounds[i].GetEntries()>0):
176 factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
177
178 if (Ebackgrounds[i].GetEntries()>0):
179 factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
180
181 for var in MVA_Vars['Nominal']:
182 factory.AddVariable(var,'D') # add the variables
183
184 #Execute TMVA
185 factory.SetSignalWeightExpression(weightF)
186 factory.SetBackgroundWeightExpression(weightF)
187 factory.Verbose()
188 my_methodBase_bdt = factory.BookMethod(MVAtype,MVAname,MVAsettings)
189 my_methodBase_bdt.TrainMethod()
190 #factory.TrainAllMethods()
191 factory.TestAllMethods()
192 factory.EvaluateAllMethods()
193 output.Write()
194
195
196 #training performance parameters
197
198 #output.ls()
199 output.cd('Method_%s'%MVAtype)
200 #ROOT.gDirectory.ls()
201 ROOT.gDirectory.cd(MVAname)
202
203 rocIntegral_default=my_methodBase_bdt.GetROCIntegral()
204 roc_integral_test = my_methodBase_bdt.GetROCIntegral(ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_S'),ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_B'))
205 roc_integral_train = my_methodBase_bdt.GetROCIntegral(ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_Train_S'),ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_Train_B'))
206 significance = my_methodBase_bdt.GetSignificance()
207 separation_test = my_methodBase_bdt.GetSeparation(ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_S'),ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_B'))
208 separation_train = my_methodBase_bdt.GetSeparation(ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_Train_S'),ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_Train_B'))
209 ks_signal = (ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_S')).KolmogorovTest(ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_Train_S'))
210 ks_bkg= (ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_B')).KolmogorovTest(ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_Train_B'))
211
212
213 print '@DEBUG: Test Integral'
214 print ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_S').Integral()
215 print '@LOG: ROC integral (default)'
216 print rocIntegral_default
217 print '@LOG: ROC integral using signal and background'
218 print roc_integral_test
219 print '@LOG: ROC integral using train signal and background'
220 print roc_integral_train
221 print '@LOG: ROC integral ratio (Test/Train)'
222 print roc_integral_test/roc_integral_train
223 print '@LOG: Significance'
224 print significance
225 print '@LOG: Separation for test sample'
226 print separation_test
227 print '@LOG: Separation for test train'
228 print separation_train
229 print '@LOG: Kolmogorov test on signal'
230 print ks_signal
231 print '@LOG: Kolmogorov test on background'
232 print ks_bkg
233
234 #!! update the database
235 import sqlite3 as lite
236 con = lite.connect(MVAdir+'Trainings.db',timeout=10000) #timeout in milliseconds. default 5 sec
237 with con: # here DB is locked
238 cur = con.cursor()
239 cur.execute("create table if not exists trainings (Roc_integral real, Separation real, Significance real, Ks_signal real, Ks_background real, Roc_integral_train real, Separation_train real, MVASettings text)");
240 cur.execute("insert into trainings values(?,?,?,?,?,?,?,?)",(roc_integral_test,separation_test,significance,ks_signal,ks_bkg,roc_integral_train,separation_train,MVAsettings));
241 #!! here is unlocked
242
243 #!! Close the output file to avoid memory leak
244 output.Close()
245
246
247 #WRITE INFOFILE
248 infofile = open(MVAdir+factoryname+'_'+MVAname+'.info','w')
249 print '@DEBUG: output infofile name'
250 print infofile
251
252 info=mvainfo(MVAname)
253 info.factoryname=factoryname
254 info.factorysettings=factorysettings
255 info.MVAtype=MVAtype
256 info.MVAsettings=MVAsettings
257 info.weightfilepath=MVAdir
258 info.path=path
259 info.varset=treeVarSet
260 info.vars=MVA_Vars['Nominal']
261 pickle.dump(info,infofile)
262 infofile.close()
263
264 # open the TMVA Gui
265 if gui == True:
266 ROOT.gROOT.ProcessLine( ".L myutils/TMVAGui.C")
267 ROOT.gROOT.ProcessLine( "TMVAGui(\"%s\")" % fnameOutput )
268 ROOT.gApplication.Run()
269
270