ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
Revision: 1.16
Committed: Wed Jan 16 16:22:47 2013 UTC (12 years, 4 months ago) by peller
Content type: text/x-python
Branch: MAIN
Changes since 1.15: +9 -19 lines
Log Message:
reorganized the whole repository. Macros im myutils, config files in subdirectories. Config file split in parts. Path config file restructured. Moved all path options to the path config. Changed the code accordingly.

File Contents

# User Rev Content
1 peller 1.1 #!/usr/bin/env python
2 nmohr 1.9 from optparse import OptionParser
3 peller 1.1 import sys
4 peller 1.16 import pickle
5     import ROOT
6     from ROOT import TFile, TTree
7     from array import array
8     from myutils import BetterConfigParser, sample, printc, mvainfo, parse_info
9     #ToDo:
10     from gethistofromtree import getScale
11 peller 1.1
12     #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
13     #usage: ./train run gui
14    
15    
16     #CONFIGURE
17 nmohr 1.9 argv = sys.argv
18     parser = OptionParser()
19     parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
20     help="Verbose mode.")
21     parser.add_option("-T", "--training", dest="training", default="",
22     help="Training")
23     parser.add_option("-C", "--config", dest="config", default=[], action="append",
24     help="configuration file")
25     (opts, args) = parser.parse_args(argv)
26     if opts.config =="":
27     opts.config = "config"
28 peller 1.1
29     #load config
30 nmohr 1.6 config = BetterConfigParser()
31 nmohr 1.9 config.read(opts.config)
32     anaTag = config.get("Analysis","tag")
33     run=opts.training
34     gui=opts.verbose
35 peller 1.7
36     #GLOABAL rescale from Train/Test Spliiting:
37     global_rescale=2.
38 peller 1.1
39     #get locations:
40 nmohr 1.10 MVAdir=config.get('Directories','vhbbpath')+'/data/'
41 nmohr 1.13 samplesinfo=config.get('Directories','samplesinfo')
42 peller 1.1
43     #systematics
44     systematics=config.get('systematics','systematics')
45     systematics=systematics.split(' ')
46    
47     weightF=config.get('Weights','weightF')
48    
49 bortigno 1.14 VHbbNameSpace=config.get('VHbbNameSpace','library')
50     ROOT.gSystem.Load(VHbbNameSpace)
51 peller 1.7
52 nmohr 1.9 def getTree(job,cut,path,subsample=-1):
53 peller 1.12 #print path+'/'+job.getpath()
54 nmohr 1.9 newinput = TFile.Open(path+'/'+job.getpath(),'read')
55 peller 1.7 output.cd()
56     Tree = newinput.Get(job.tree)
57 peller 1.1 #Tree.SetDirectory(0)
58 peller 1.7
59    
60     if subsample>-1:
61 peller 1.8 #print 'cut: (%s) & (%s)'%(cut,job.subcuts[subsample])
62 peller 1.7 CuttedTree=Tree.CopyTree('(%s) & (%s)'%(cut,job.subcuts[subsample]))
63     #print '\t--> read in %s'%job.group[subsample]
64    
65     else:
66     CuttedTree=Tree.CopyTree(cut)
67     #print '\t--> read in %s'%job.name
68 peller 1.8 newinput.Close()
69 peller 1.7
70     #CuttedTree.SetDirectory(0)
71 peller 1.1 return CuttedTree
72    
73 peller 1.7 #def getScale(job,subsample=-1):
74     # input = TFile.Open(job.getpath())
75     # CountWithPU = input.Get("CountWithPU")
76     # CountWithPU2011B = input.Get("CountWithPU2011B")
77     # #print lumi*xsecs[i]/hist.GetBinContent(1)
78     # return float(job.lumi)*float(job.xsec)*float(job.sf)/(0.46502*CountWithPU.GetBinContent(1)+0.53498*CountWithPU2011B.GetBinContent(1))*2/float(job.split)
79 peller 1.1
80    
81    
82     #CONFIG
83     #factory
84     factoryname=config.get('factory','factoryname')
85     factorysettings=config.get('factory','factorysettings')
86     #MVA
87     MVAtype=config.get(run,'MVAtype')
88     MVAname=run
89     MVAsettings=config.get(run,'MVAsettings')
90 nmohr 1.10 fnameOutput = MVAdir+factoryname+'_'+MVAname+'.root'
91 peller 1.1 #locations
92     path=config.get(run,'path')
93    
94     TCutname=config.get(run, 'treeCut')
95     TCut=config.get('Cuts',TCutname)
96 peller 1.3 #print TCut
97 peller 1.1
98     #signals
99     signals=config.get(run,'signals')
100     signals=signals.split(' ')
101     #backgrounds
102     backgrounds=config.get(run,'backgrounds')
103     backgrounds=backgrounds.split(' ')
104    
105     treeVarSet=config.get(run,'treeVarSet')
106    
107     #variables
108     #TreeVar Array
109     MVA_Vars={}
110     MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
111     MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')
112     #Spectators:
113 peller 1.3 #spectators=config.get(treeVarSet,'spectators')
114     #spectators=spectators.split(' ')
115 peller 1.1
116     #TRAINING samples
117 bortigno 1.15 #infofile = open(samplesinfo,'r')
118     #info = pickle.load(infofile)
119     #infofile.close()
120    
121 peller 1.16 info = parse_info(samplesinfo,path)
122 peller 1.1
123     #Workdir
124     workdir=ROOT.gDirectory.GetPath()
125    
126    
127 nmohr 1.9 TrainCut='%s & EventForTraining==1'%TCut
128     EvalCut='%s & EventForTraining==0'%TCut
129 peller 1.1
130     #load TRAIN trees
131     Tbackgrounds = []
132     TbScales = []
133     Tsignals = []
134     TsScales = []
135    
136 peller 1.7
137    
138     output = ROOT.TFile.Open(fnameOutput, "RECREATE")
139    
140 peller 1.8 print '\n\t>>> TRAINING EVENTS <<<\n'
141 peller 1.7
142 peller 1.1 for job in info:
143 peller 1.7 if eval(job.active):
144 peller 1.8
145     if job.subsamples:
146     print '\tREADING IN SUBSAMPLES of %s'%job.name
147     for subsample in range(0,len(job.group)):
148     if job.subnames[subsample] in signals:
149     print '\t- %s as SIG'%job.group[subsample]
150 nmohr 1.9 Tsignal = getTree(job,TrainCut,path,subsample)
151 peller 1.8 ROOT.gDirectory.Cd(workdir)
152 peller 1.11 TsScale = getScale(job,path,config,global_rescale,subsample)
153 peller 1.8 Tsignals.append(Tsignal)
154     TsScales.append(TsScale)
155     print '\t\t\t%s events'%Tsignal.GetEntries()
156     elif job.subnames[subsample] in backgrounds:
157     print '\t- %s as BKG'%job.group[subsample]
158 nmohr 1.9 Tbackground = getTree(job,TrainCut,path,subsample)
159 peller 1.7 ROOT.gDirectory.Cd(workdir)
160 peller 1.11 TbScale = getScale(job,path,config,global_rescale,subsample)
161 peller 1.7 Tbackgrounds.append(Tbackground)
162     TbScales.append(TbScale)
163 peller 1.8 print '\t\t\t%s events'%Tbackground.GetEntries()
164    
165     else:
166     if job.name in signals:
167     print '\tREADING IN %s AS SIG'%job.name
168 nmohr 1.9 Tsignal = getTree(job,TrainCut,path)
169 peller 1.8 ROOT.gDirectory.Cd(workdir)
170 peller 1.11 TsScale = getScale(job,path,config,global_rescale)
171 peller 1.8 Tsignals.append(Tsignal)
172     TsScales.append(TsScale)
173     print '\t\t\t%s events'%Tsignal.GetEntries()
174     elif job.name in backgrounds:
175 peller 1.7 print '\tREADING IN %s AS BKG'%job.name
176 nmohr 1.9 Tbackground = getTree(job,TrainCut,path)
177 peller 1.7 ROOT.gDirectory.Cd(workdir)
178 peller 1.11 TbScale = getScale(job,path,config,global_rescale)
179 peller 1.7 Tbackgrounds.append(Tbackground)
180     TbScales.append(TbScale)
181 peller 1.8 print '\t\t\t%s events'%Tbackground.GetEntries()
182    
183    
184 peller 1.1 #load EVALUATE trees
185     Ebackgrounds = []
186     EbScales = []
187     Esignals = []
188     EsScales = []
189    
190 peller 1.8 print '\n\t>>> TESTING EVENTS <<<\n'
191 peller 1.7
192    
193 peller 1.1 for job in info:
194 peller 1.7 if eval(job.active):
195 peller 1.8
196     if job.subsamples:
197     print '\tREADING IN SUBSAMPLES of %s'%job.name
198     for subsample in range(0,len(job.group)):
199     if job.subnames[subsample] in signals:
200     print '\t- %s as SIG'%job.group[subsample]
201 nmohr 1.9 Esignal = getTree(job,EvalCut,path,subsample)
202 peller 1.8 ROOT.gDirectory.Cd(workdir)
203 peller 1.11 EsScale = getScale(job,path,config,global_rescale,subsample)
204 peller 1.8 Esignals.append(Esignal)
205     EsScales.append(EsScale)
206     print '\t\t\t%s events'%Esignal.GetEntries()
207     elif job.subnames[subsample] in backgrounds:
208     print '\t- %s as BKG'%job.group[subsample]
209 nmohr 1.9 Ebackground = getTree(job,EvalCut,path,subsample)
210 peller 1.7 ROOT.gDirectory.Cd(workdir)
211 peller 1.11 EbScale = getScale(job,path,config,global_rescale,subsample)
212 peller 1.7 Ebackgrounds.append(Ebackground)
213     EbScales.append(EbScale)
214 peller 1.8 print '\t\t\t%s events'%Ebackground.GetEntries()
215    
216     else:
217     if job.name in signals:
218     print '\tREADING IN %s AS SIG'%job.name
219 nmohr 1.9 Esignal = getTree(job,EvalCut,path)
220 peller 1.8 ROOT.gDirectory.Cd(workdir)
221 peller 1.11 EsScale = getScale(job,path,config,global_rescale)
222 peller 1.8 Esignals.append(Esignal)
223     EsScales.append(EsScale)
224     print '\t\t\t%s events'%Esignal.GetEntries()
225     elif job.name in backgrounds:
226 peller 1.7 print '\tREADING IN %s AS BKG'%job.name
227 nmohr 1.9 Ebackground = getTree(job,EvalCut,path)
228 peller 1.7 ROOT.gDirectory.Cd(workdir)
229 peller 1.11 EbScale = getScale(job,path,config,global_rescale)
230 peller 1.7 Ebackgrounds.append(Ebackground)
231     EbScales.append(EbScale)
232 peller 1.8 print '\t\t\t%s events'%Ebackground.GetEntries()
233 peller 1.1
234 peller 1.7
235     #output = ROOT.TFile.Open(fnameOutput, "RECREATE")
236 peller 1.1 factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
237    
238     #set input trees
239     for i in range(len(Tsignals)):
240    
241 peller 1.8 #print 'Number of SIG entries: %s'%Tsignals[i].GetEntries()
242 peller 1.1 factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
243 peller 1.8 #print 'Number of SIG entries: %s'%Esignals[i].GetEntries()
244 peller 1.1 factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
245    
246     for i in range(len(Tbackgrounds)):
247     if (Tbackgrounds[i].GetEntries()>0):
248 peller 1.8 #print 'Number of BKG entries: %s'%Tbackgrounds[i].GetEntries()
249 peller 1.1 factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
250    
251     if (Ebackgrounds[i].GetEntries()>0):
252 peller 1.8 #print 'Number of BKG entries: %s'%Ebackgrounds[i].GetEntries()
253 peller 1.1 factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
254    
255    
256     for var in MVA_Vars['Nominal']:
257     factory.AddVariable(var,'D') # add the variables
258     #for var in spectators:
259     # factory.AddSpectator(var,'D') #add specators
260    
261     #Execute TMVA
262     factory.SetSignalWeightExpression(weightF)
263 peller 1.12 factory.SetBackgroundWeightExpression(weightF)
264 peller 1.1 factory.Verbose()
265     factory.BookMethod(MVAtype,MVAname,MVAsettings)
266     factory.TrainAllMethods()
267     factory.TestAllMethods()
268     factory.EvaluateAllMethods()
269     output.Write()
270    
271     #WRITE INFOFILE
272 nmohr 1.10 infofile = open(MVAdir+factoryname+'_'+MVAname+'.info','w')
273 peller 1.1 info=mvainfo(MVAname)
274     info.factoryname=factoryname
275     info.factorysettings=factorysettings
276     info.MVAtype=MVAtype
277     info.MVAsettings=MVAsettings
278 nmohr 1.10 info.weightfilepath=MVAdir
279 peller 1.1 info.path=path
280     info.varset=treeVarSet
281     info.vars=MVA_Vars['Nominal']
282 peller 1.3 #info.spectators=spectators
283 peller 1.1 pickle.dump(info,infofile)
284     infofile.close()
285    
286     # open the TMVA Gui
287 nmohr 1.9 if gui == True:
288 peller 1.16 ROOT.gROOT.ProcessLine( ".L myutils/TMVAGui.C")
289 peller 1.1 ROOT.gROOT.ProcessLine( "TMVAGui(\"%s\")" % fnameOutput )
290     ROOT.gApplication.Run()
291    
292