ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/train.py
(Generate patch)

Comparing UserCode/VHbb/python/train.py (file contents):
Revision 1.18 by bortigno, Wed Jan 23 13:39:09 2013 UTC vs.
Revision 1.19 by nmohr, Fri Jan 25 16:18:00 2013 UTC

# Line 3 | Line 3 | from optparse import OptionParser
3   import sys
4   import pickle
5   import ROOT
6 + ROOT.gROOT.SetBatch(True)
7   from array import array
7 from myutils import BetterConfigParser, sample, printc, mvainfo, parse_info
8 #ToDo:
9 from gethistofromtree import getScale
10
8   #warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
9   #usage: ./train run gui
10  
14
11   #CONFIGURE
12   argv = sys.argv
13   parser = OptionParser()
# Line 25 | Line 21 | parser.add_option("-C", "--config", dest
21   if opts.config =="":
22          opts.config = "config"
23  
24 + #Import after configure to get help message
25 + from myutils import BetterConfigParser, mvainfo, ParseInfo, TreeCache
26 +
27   #load config
28   config = BetterConfigParser()
29   config.read(opts.config)
# Line 36 | Line 35 | gui=opts.verbose
35   global_rescale=2.
36  
37   #get locations:
38 < MVAdir=config.get('Directories','vhbbpath')+'/data_test/'
38 > MVAdir=config.get('Directories','vhbbpath')+'/data/'
39   samplesinfo=config.get('Directories','samplesinfo')
40  
41   #systematics
# Line 48 | Line 47 | weightF=config.get('Weights','weightF')
47   VHbbNameSpace=config.get('VHbbNameSpace','library')
48   ROOT.gSystem.Load(VHbbNameSpace)
49  
51 def getTree(job,cut,path,subsample=-1):
52    #print path+'/'+job.getpath()
53    newinput = ROOT.TFile.Open(path+'/'+job.getpath(),'read')
54    output.cd()
55    Tree = newinput.Get(job.tree)
56    #Tree.SetDirectory(0)
57
58      
59    if subsample>-1:
60        #print 'cut: (%s) & (%s)'%(cut,job.subcuts[subsample])
61        CuttedTree=Tree.CopyTree('(%s) & (%s)'%(cut,job.subcuts[subsample]))    
62        CuttedTree.SetNameTitle(job.subnames[subsample],job.subnames[subsample])
63        print '\t--> read in %s'%job.subnames[subsample]
64    else:
65        CuttedTree=Tree.CopyTree(cut)
66        CuttedTree.SetNameTitle(job.name,job.name)
67        print '\t--> read in %s'%job.name
68    newinput.Close()
69
70    #CuttedTree.SetDirectory(0)
71    return CuttedTree
72        
73 #def getScale(job,subsample=-1):
74 #    input = TFile.Open(job.getpath())
75 #    CountWithPU = input.Get("CountWithPU")
76 #    CountWithPU2011B = input.Get("CountWithPU2011B")
77 #    #print lumi*xsecs[i]/hist.GetBinContent(1)
78 #    return float(job.lumi)*float(job.xsec)*float(job.sf)/(0.46502*CountWithPU.GetBinContent(1)+0.53498*CountWithPU2011B.GetBinContent(1))*2/float(job.split)
79
50   #CONFIG
81 #suffix for output name
82 suffix='_newVars_v2'
51   #factory
52   factoryname=config.get('factory','factoryname')
53   factorysettings=config.get('factory','factorysettings')
# Line 87 | Line 55 | factorysettings=config.get('factory','fa
55   MVAtype=config.get(run,'MVAtype')
56   MVAname=run
57   MVAsettings=config.get(run,'MVAsettings')
58 < fnameOutput = MVAdir+factoryname+'_'+MVAname+suffix+'.root'
58 > fnameOutput = MVAdir+factoryname+'_'+MVAname+'.root'
59   #locations
60   path=config.get('Directories','SYSout')
61  
# Line 109 | Line 77 | treeVarSet=config.get(run,'treeVarSet')
77   MVA_Vars={}
78   MVA_Vars['Nominal']=config.get(treeVarSet,'Nominal')
79   MVA_Vars['Nominal']=MVA_Vars['Nominal'].split(' ')    
112 #Spectators:
113 #spectators=config.get(treeVarSet,'spectators')
114 #spectators=spectators.split(' ')
115
116 #TRAINING samples
117 #infofile = open(samplesinfo,'r')
118 #info = pickle.load(infofile)
119 #infofile.close()
80  
81 < info = parse_info(samplesinfo,path)
81 > #Infofile
82 > info = ParseInfo(samplesinfo,path)
83  
84   #Workdir
85   workdir=ROOT.gDirectory.GetPath()
# Line 126 | Line 87 | workdir=ROOT.gDirectory.GetPath()
87  
88   TrainCut='%s & EventForTraining==1'%TCut
89   EvalCut='%s & EventForTraining==0'%TCut
90 + cuts = [TrainCut,EvalCut]
91  
130 #load TRAIN trees
131 Tbackgrounds = []
132 TbScales = []
133 Tsignals = []
134 TsScales = []
92  
93 + samples = []
94 + samples = info.get_samples(signals+backgrounds)
95  
96 + tc = TreeCache(cuts,samples,path)
97  
98   output = ROOT.TFile.Open(fnameOutput, "RECREATE")
99  
100 < print '\n\t>>> TRAINING EVENTS <<<\n'
100 > print '\n\t>>> READING EVENTS <<<\n'
101  
102 < for job in info:
103 <    if eval(job.active):
104 <    
105 <        if job.subsamples:
106 <            print '\tREADING IN SUBSAMPLES of %s'%job.name
107 <            for subsample in range(0,len(job.group)):
108 <                if job.subnames[subsample] in signals:
109 <                    print '\t- %s as SIG'%job.group[subsample]
110 <                    Tsignal = getTree(job,TrainCut,path,subsample)
151 <                    ROOT.gDirectory.Cd(workdir)
152 <                    TsScale = getScale(job,path,config,global_rescale,subsample)
153 <                    Tsignals.append(Tsignal)
154 <                    TsScales.append(TsScale)
155 <                    print '\t\t\t%s events'%Tsignal.GetEntries()
156 <                elif job.subnames[subsample] in backgrounds:
157 <                    print '\t- %s as BKG'%job.group[subsample]
158 <                    Tbackground = getTree(job,TrainCut,path,subsample)
159 <                    ROOT.gDirectory.Cd(workdir)
160 <                    TbScale = getScale(job,path,config,global_rescale,subsample)
161 <                    Tbackgrounds.append(Tbackground)
162 <                    TbScales.append(TbScale)
163 <                    print '\t\t\t%s events'%Tbackground.GetEntries()
164 <    
165 <        else:
166 <            if job.name in signals:
167 <                print '\tREADING IN %s AS SIG'%job.name
168 <                Tsignal = getTree(job,TrainCut,path)
169 <                ROOT.gDirectory.Cd(workdir)
170 <                TsScale = getScale(job,path,config,global_rescale)
171 <                Tsignals.append(Tsignal)
172 <                TsScales.append(TsScale)
173 <                print '\t\t\t%s events'%Tsignal.GetEntries()
174 <            elif job.name in backgrounds:
175 <                print '\tREADING IN %s AS BKG'%job.name
176 <                Tbackground = getTree(job,TrainCut,path)
177 <                ROOT.gDirectory.Cd(workdir)
178 <                TbScale = getScale(job,path,config,global_rescale)
179 <                Tbackgrounds.append(Tbackground)
180 <                TbScales.append(TbScale)
181 <                print '\t\t\t%s events'%Tbackground.GetEntries()
182 <            
183 <            
184 < #load EVALUATE trees
102 > signal_samples = info.get_samples(signals)
103 > background_samples = info.get_samples(backgrounds)
104 >
105 > #TRAIN trees
106 > Tbackgrounds = []
107 > TbScales = []
108 > Tsignals = []
109 > TsScales = []
110 > #EVAL trees
111   Ebackgrounds = []
112   EbScales = []
113   Esignals = []
114   EsScales = []
115  
116 < print '\n\t>>> TESTING EVENTS <<<\n'
117 <
118 <
119 < for job in info:
120 <    if eval(job.active):
121 <    
122 <        if job.subsamples:
123 <            print '\tREADING IN SUBSAMPLES of %s'%job.name
124 <            for subsample in range(0,len(job.group)):
125 <                if job.subnames[subsample] in signals:
126 <                    print '\t- %s as SIG'%job.group[subsample]
127 <                    Esignal = getTree(job,EvalCut,path,subsample)
128 <                    ROOT.gDirectory.Cd(workdir)
129 <                    EsScale = getScale(job,path,config,global_rescale,subsample)
130 <                    Esignals.append(Esignal)
131 <                    EsScales.append(EsScale)
132 <                    print '\t\t\t%s events'%Esignal.GetEntries()
133 <                elif job.subnames[subsample] in backgrounds:
134 <                    print '\t- %s as BKG'%job.group[subsample]
135 <                    Ebackground = getTree(job,EvalCut,path,subsample)
136 <                    ROOT.gDirectory.Cd(workdir)
137 <                    EbScale = getScale(job,path,config,global_rescale,subsample)
138 <                    Ebackgrounds.append(Ebackground)
139 <                    EbScales.append(EbScale)
140 <                    print '\t\t\t%s events'%Ebackground.GetEntries()
141 <
142 <        else:
217 <            if job.name in signals:
218 <                print '\tREADING IN %s AS SIG'%job.name
219 <                Esignal = getTree(job,EvalCut,path)
220 <                ROOT.gDirectory.Cd(workdir)
221 <                EsScale = getScale(job,path,config,global_rescale)
222 <                Esignals.append(Esignal)
223 <                EsScales.append(EsScale)
224 <                print '\t\t\t%s events'%Esignal.GetEntries()
225 <            elif job.name in backgrounds:
226 <                print '\tREADING IN %s AS BKG'%job.name
227 <                Ebackground = getTree(job,EvalCut,path)
228 <                ROOT.gDirectory.Cd(workdir)
229 <                EbScale = getScale(job,path,config,global_rescale)
230 <                Ebackgrounds.append(Ebackground)
231 <                EbScales.append(EbScale)
232 <                print '\t\t\t%s events'%Ebackground.GetEntries()
233 <
116 > #load trees
117 > for job in signal_samples:
118 >    print '\tREADING IN %s AS SIG'%job.name
119 >    Tsignal = tc.get_tree(job,TrainCut)
120 >    ROOT.gDirectory.Cd(workdir)
121 >    TsScale = tc.get_scale(job,config)*global_rescale    
122 >    Tsignals.append(Tsignal)
123 >    TsScales.append(TsScale)
124 >    Esignal = tc.get_tree(job,EvalCut)
125 >    Esignals.append(Esignal)
126 >    EsScales.append(TsScale)
127 >    print '\t\t\tTraining %s events'%Tsignal.GetEntries()
128 >    print '\t\t\tEval %s events'%Esignal.GetEntries()
129 > for job in background_samples:
130 >    print '\tREADING IN %s AS BKG'%job.name
131 >    Tbackground = tc.get_tree(job,TrainCut)
132 >    ROOT.gDirectory.Cd(workdir)
133 >    TbScale = tc.get_scale(job,config)*global_rescale
134 >    Tbackgrounds.append(Tbackground)
135 >    TbScales.append(TbScale)
136 >    Ebackground = tc.get_tree(job,EvalCut)
137 >    ROOT.gDirectory.Cd(workdir)
138 >    Ebackgrounds.append(Ebackground)
139 >    EbScales.append(TbScale)
140 >    print '\t\t\tTraining %s events'%Tbackground.GetEntries()
141 >    print '\t\t\tEval %s events'%Ebackground.GetEntries()
142 >            
143  
235 #output = ROOT.TFile.Open(fnameOutput, "RECREATE")
144   factory = ROOT.TMVA.Factory(factoryname, output, factorysettings)
145  
146   #set input trees
147   for i in range(len(Tsignals)):
240
241    #print 'Number of SIG entries: %s'%Tsignals[i].GetEntries()
148      factory.AddSignalTree(Tsignals[i], TsScales[i], ROOT.TMVA.Types.kTraining)
243    #print 'Number of SIG entries: %s'%Esignals[i].GetEntries()
149      factory.AddSignalTree(Esignals[i], EsScales[i], ROOT.TMVA.Types.kTesting)
150  
151   for i in range(len(Tbackgrounds)):
152      if (Tbackgrounds[i].GetEntries()>0):
248        #print 'Number of BKG entries: %s'%Tbackgrounds[i].GetEntries()
153          factory.AddBackgroundTree(Tbackgrounds[i], TbScales[i], ROOT.TMVA.Types.kTraining)
154  
155      if (Ebackgrounds[i].GetEntries()>0):
252        #print 'Number of BKG entries: %s'%Ebackgrounds[i].GetEntries()
156          factory.AddBackgroundTree(Ebackgrounds[i], EbScales[i], ROOT.TMVA.Types.kTesting)
157          
255        
158   for var in MVA_Vars['Nominal']:
159      factory.AddVariable(var,'D') # add the variables
258 #for var in spectators:
259 #    factory.AddSpectator(var,'D') #add specators
160  
161   #Execute TMVA
162   factory.SetSignalWeightExpression(weightF)
# Line 269 | Line 169 | factory.EvaluateAllMethods()
169   output.Write()
170  
171   #WRITE INFOFILE
172 < infofile = open(MVAdir+factoryname+'_'+MVAname+suffix+'.info','w')
172 > infofile = open(MVAdir+factoryname+'_'+MVAname+'.info','w')
173   info=mvainfo(MVAname)
174   info.factoryname=factoryname
175   info.factorysettings=factorysettings
# Line 279 | Line 179 | info.weightfilepath=MVAdir
179   info.path=path
180   info.varset=treeVarSet
181   info.vars=MVA_Vars['Nominal']
282 #info.spectators=spectators
182   pickle.dump(info,infofile)
183   infofile.close()
184  

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines