ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
Revision: 1.26
Committed: Fri Apr 12 09:33:15 2013 UTC (12 years, 1 month ago) by peller
Content type: text/x-python
Branch: MAIN
CVS Tags: lhcp_UnblindFix, hcp_Unblind, HEAD
Changes since 1.25: +1 -1 lines
Log Message:
fix in MVAtype

File Contents

# User Rev Content
1 peller 1.1 #!/usr/bin/env python
2 nmohr 1.9 from optparse import OptionParser
3 peller 1.1 import sys
4 peller 1.16 import pickle
5     import ROOT
6 nmohr 1.19 ROOT.gROOT.SetBatch(True)
7 peller 1.16 from array import array
8 peller 1.1 #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
9     #usage: ./train run gui
10    
11     #CONFIGURE
12 nmohr 1.9 argv = sys.argv
13     parser = OptionParser()
14     parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
15     help="Verbose mode.")
16     parser.add_option("-T", "--training", dest="training", default="",
17     help="Training")
18     parser.add_option("-C", "--config", dest="config", default=[], action="append",
19     help="configuration file")
20 bortigno 1.22 parser.add_option("-S","--setting", dest="MVAsettings", default='',
21     help="Parameter setting string")
22     parser.add_option("-N","--name", dest="set_name", default='',
23     help="Parameter setting name. Output files will have this name")
24     parser.add_option("-L","--local",dest="local", default=True,
25 bortigno 1.24 help="True to run it locally. False to run on batch system using config")
26 bortigno 1.22
27 nmohr 1.9 (opts, args) = parser.parse_args(argv)
28     if opts.config =="":
29     opts.config = "config"
30 peller 1.1
31 nmohr 1.19 #Import after configure to get help message
32     from myutils import BetterConfigParser, mvainfo, ParseInfo, TreeCache
33    
34 peller 1.1 #load config
35 nmohr 1.6 config = BetterConfigParser()
36 nmohr 1.9 config.read(opts.config)
37     anaTag = config.get("Analysis","tag")
38     run=opts.training
39     gui=opts.verbose
40 peller 1.7
41     #GLOABAL rescale from Train/Test Spliiting:
42     global_rescale=2.
43 peller 1.1
44     #get locations:
45 nmohr 1.19 MVAdir=config.get('Directories','vhbbpath')+'/data/'
46 nmohr 1.13 samplesinfo=config.get('Directories','samplesinfo')
47 peller 1.1
48     #systematics
49     systematics=config.get('systematics','systematics')
50     systematics=systematics.split(' ')
51    
52     weightF=config.get('Weights','weightF')
53    
54 bortigno 1.14 VHbbNameSpace=config.get('VHbbNameSpace','library')
55     ROOT.gSystem.Load(VHbbNameSpace)
56 peller 1.7
57 peller 1.1 #CONFIG
58     #factory
59     factoryname=config.get('factory','factoryname')
60     factorysettings=config.get('factory','factorysettings')
61     #MVA
62     MVAtype=config.get(run,'MVAtype')
63 bortigno 1.23 #MVA name and settings. From local running or batch running different option
64     print opts.local
65     if(eval(opts.local)):
66     print 'Local run'
67 bortigno 1.22 MVAname=run
68 bortigno 1.23 MVAsettings=config.get(run,'MVAsettings')
69     elif(opts.set_name!='' and opts.MVAsettings!=''):
70     print 'Batch run'
71 bortigno 1.22 MVAname=opts.set_name
72 bortigno 1.23 MVAsettings=opts.MVAsettings
73     else :
74 bortigno 1.22 print 'Problem in configuration. Missing or inconsitent information Check input options'
75     sys.exit()
76 bortigno 1.23 print '@DEBUG: MVAname'
77     print 'input : ' + opts.set_name
78     print 'used : ' + MVAname
79 bortigno 1.22
80 bortigno 1.23 fnameOutput = MVAdir+factoryname+'_'+MVAname+'.root'
81     print '@DEBUG: output file name : ' + fnameOutput
82 bortigno 1.22
83 peller 1.1 #locations
84 nmohr 1.17 path=config.get('Directories','SYSout')
85 peller 1.1
86     TCutname=config.get(run, 'treeCut')
87     TCut=config.get('Cuts',TCutname)
88 peller 1.3 #print TCut
89 peller 1.1
90     #signals
91     signals=config.get(run,'signals')
92 peller 1.21 signals=eval(signals)
93 peller 1.1 #backgrounds
94     backgrounds=config.get(run,'backgrounds')
95 peller 1.21 backgrounds=eval(backgrounds)
96 peller 1.1 treeVarSet=config.get(run,'treeVarSet')
97    
98     #variables
99     #TreeVar Array
100     MVA_Vars={}
101     MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
102     MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')
103 bortigno 1.15
104 nmohr 1.19 #Infofile
105     info = ParseInfo(samplesinfo,path)
106 peller 1.1
107     #Workdir
108     workdir=ROOT.gDirectory.GetPath()
109    
110    
111 nmohr 1.9 TrainCut='%s & EventForTraining==1'%TCut
112     EvalCut='%s & EventForTraining==0'%TCut
113 nmohr 1.19 cuts = [TrainCut,EvalCut]
114 peller 1.1
115    
116 nmohr 1.19 samples = []
117     samples = info.get_samples(signals+backgrounds)
118 peller 1.7
119 nmohr 1.20 tc = TreeCache(cuts,samples,path,config)
120 peller 1.7
121     output = ROOT.TFile.Open(fnameOutput, "RECREATE")
122    
123 nmohr 1.19 print '\n\t>>> READING EVENTS <<<\n'
124    
125     signal_samples = info.get_samples(signals)
126     background_samples = info.get_samples(backgrounds)
127 peller 1.7
128 nmohr 1.19 #TRAIN trees
129     Tbackgrounds = []
130     TbScales = []
131     Tsignals = []
132     TsScales = []
133     #EVAL trees
134 peller 1.1 Ebackgrounds = []
135     EbScales = []
136     Esignals = []
137     EsScales = []
138    
139 nmohr 1.19 #load trees
140     for job in signal_samples:
141     print '\tREADING IN %s AS SIG'%job.name
142     Tsignal = tc.get_tree(job,TrainCut)
143     ROOT.gDirectory.Cd(workdir)
144     TsScale = tc.get_scale(job,config)*global_rescale
145     Tsignals.append(Tsignal)
146     TsScales.append(TsScale)
147     Esignal = tc.get_tree(job,EvalCut)
148     Esignals.append(Esignal)
149     EsScales.append(TsScale)
150     print '\t\t\tTraining %s events'%Tsignal.GetEntries()
151     print '\t\t\tEval %s events'%Esignal.GetEntries()
152     for job in background_samples:
153     print '\tREADING IN %s AS BKG'%job.name
154     Tbackground = tc.get_tree(job,TrainCut)
155     ROOT.gDirectory.Cd(workdir)
156     TbScale = tc.get_scale(job,config)*global_rescale
157     Tbackgrounds.append(Tbackground)
158     TbScales.append(TbScale)
159     Ebackground = tc.get_tree(job,EvalCut)
160     ROOT.gDirectory.Cd(workdir)
161     Ebackgrounds.append(Ebackground)
162     EbScales.append(TbScale)
163     print '\t\t\tTraining %s events'%Tbackground.GetEntries()
164     print '\t\t\tEval %s events'%Ebackground.GetEntries()
165    
166 peller 1.7
167 peller 1.1 factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
168    
169     #set input trees
170     for i in range(len(Tsignals)):
171     factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
172     factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
173    
174     for i in range(len(Tbackgrounds)):
175     if (Tbackgrounds[i].GetEntries()>0):
176     factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
177    
178     if (Ebackgrounds[i].GetEntries()>0):
179     factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
180    
181     for var in MVA_Vars['Nominal']:
182     factory.AddVariable(var,'D') # add the variables
183    
184     #Execute TMVA
185     factory.SetSignalWeightExpression(weightF)
186 peller 1.12 factory.SetBackgroundWeightExpression(weightF)
187 peller 1.1 factory.Verbose()
188 bortigno 1.22 my_methodBase_bdt = factory.BookMethod(MVAtype,MVAname,MVAsettings)
189     my_methodBase_bdt.TrainMethod()
190     #factory.TrainAllMethods()
191 peller 1.1 factory.TestAllMethods()
192     factory.EvaluateAllMethods()
193     output.Write()
194    
195 bortigno 1.22
196     #training performance parameters
197    
198     #output.ls()
199 peller 1.26 output.cd('Method_%s'%MVAtype)
200 bortigno 1.22 #ROOT.gDirectory.ls()
201     ROOT.gDirectory.cd(MVAname)
202    
203     rocIntegral_default=my_methodBase_bdt.GetROCIntegral()
204     roc_integral_test = my_methodBase_bdt.GetROCIntegral(ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_S'),ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_B'))
205     roc_integral_train = my_methodBase_bdt.GetROCIntegral(ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_Train_S'),ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_Train_B'))
206     significance = my_methodBase_bdt.GetSignificance()
207     separation_test = my_methodBase_bdt.GetSeparation(ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_S'),ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_B'))
208     separation_train = my_methodBase_bdt.GetSeparation(ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_Train_S'),ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_Train_B'))
209     ks_signal = (ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_S')).KolmogorovTest(ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_Train_S'))
210     ks_bkg= (ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_B')).KolmogorovTest(ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_Train_B'))
211    
212    
213     print '@DEBUG: Test Integral'
214     print ROOT.gDirectory.Get(factoryname+'_'+MVAname+'_S').Integral()
215     print '@LOG: ROC integral (default)'
216     print rocIntegral_default
217     print '@LOG: ROC integral using signal and background'
218     print roc_integral_test
219     print '@LOG: ROC integral using train signal and background'
220     print roc_integral_train
221     print '@LOG: ROC integral ratio (Test/Train)'
222     print roc_integral_test/roc_integral_train
223     print '@LOG: Significance'
224     print significance
225     print '@LOG: Separation for test sample'
226     print separation_test
227     print '@LOG: Separation for test train'
228     print separation_train
229     print '@LOG: Kolmogorov test on signal'
230     print ks_signal
231     print '@LOG: Kolmogorov test on background'
232     print ks_bkg
233    
234 bortigno 1.25 #!! update the database
235 bortigno 1.22 import sqlite3 as lite
236     con = lite.connect(MVAdir+'Trainings.db',timeout=10000) #timeout in milliseconds. default 5 sec
237     with con: # here DB is locked
238     cur = con.cursor()
239     cur.execute("create table if not exists trainings (Roc_integral real, Separation real, Significance real, Ks_signal real, Ks_background real, Roc_integral_train real, Separation_train real, MVASettings text)");
240     cur.execute("insert into trainings values(?,?,?,?,?,?,?,?)",(roc_integral_test,separation_test,significance,ks_signal,ks_bkg,roc_integral_train,separation_train,MVAsettings));
241 bortigno 1.25 #!! here is unlocked
242    
243     #!! Close the output file to avoid memory leak
244     output.Close()
245 bortigno 1.22
246    
247 peller 1.1 #WRITE INFOFILE
248 nmohr 1.19 infofile = open(MVAdir+factoryname+'_'+MVAname+'.info','w')
249 bortigno 1.23 print '@DEBUG: output infofile name'
250     print infofile
251    
252 peller 1.1 info=mvainfo(MVAname)
253     info.factoryname=factoryname
254     info.factorysettings=factorysettings
255     info.MVAtype=MVAtype
256     info.MVAsettings=MVAsettings
257 nmohr 1.10 info.weightfilepath=MVAdir
258 peller 1.1 info.path=path
259     info.varset=treeVarSet
260     info.vars=MVA_Vars['Nominal']
261     pickle.dump(info,infofile)
262     infofile.close()
263    
264     # open the TMVA Gui
265 nmohr 1.9 if gui == True:
266 peller 1.16 ROOT.gROOT.ProcessLine( ".L myutils/TMVAGui.C")
267 peller 1.1 ROOT.gROOT.ProcessLine( "TMVAGui(\"%s\")" % fnameOutput )
268     ROOT.gApplication.Run()
269    
270