ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/evaluateMVA.py
(Generate patch)

Comparing UserCode/VHbb/python/evaluateMVA.py (file contents):
Revision 1.2 by peller, Wed May 9 09:48:23 2012 UTC vs.
Revision 1.21 by peller, Wed Jan 16 16:22:46 2013 UTC

# Line 2 | Line 2
2   import sys
3   import os
4   import ROOT
5 from ROOT import TFile
5   from array import array
6   from math import sqrt
7   from copy import copy
8   #suppres the EvalInstace conversion warning bug
9   import warnings
10   warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
11 < from ConfigParser import SafeConfigParser
13 < from samplesinfo import sample
14 < from mvainfos import mvainfo
11 > from optparse import OptionParser
12   import pickle
13 < from progbar import progbar
17 < from printcolor import printc
13 > from myutils import BetterConfigParser, progbar, mvainfo, printc, parse_info
14  
19 #CONFIGURE
15  
16 + #CONFIGURE
17 + ROOT.gROOT.SetBatch(True)
18 + print 'hello'
19   #load config
20 < config = SafeConfigParser()
21 < config.read('./config')
20 > #os.mkdir(path+'/sys')
21 > argv = sys.argv
22 > parser = OptionParser()
23 > parser.add_option("-U", "--update", dest="update", default=0,
24 >                      help="update infofile")
25 > parser.add_option("-D", "--discr", dest="discr", default="",
26 >                      help="discriminators to be added")
27 > #parser.add_option("-I", "--inpath", dest="inpath", default="",
28 > #                      help="path to samples")
29 > #parser.add_option("-O", "--outpath", dest="outpath", default="",
30 > #                      help="path where to store output samples")
31 > parser.add_option("-S", "--samples", dest="names", default="",
32 >                      help="samples you want to run on")
33 > parser.add_option("-C", "--config", dest="config", default=[], action="append",
34 >                      help="configuration file")
35 > (opts, args) = parser.parse_args(argv)
36 >
37 > #from samplesclass import sample
38 > #from mvainfos import mvainfo
39 > #from progbar import progbar
40 > #from printcolor import printc
41 >
42 >
43 > if opts.config =="":
44 >        opts.config = "config"
45 > config = BetterConfigParser()
46 > #config.read('./config7TeV_ZZ')
47 > config.read(opts.config)
48 > anaTag = config.get("Analysis","tag")
49  
50   #get locations:
51   Wdir=config.get('Directories','Wdir')
52 <
52 > samplesinfo=config.get('Directories','samplesinfo')
53  
54   #systematics
30 systematics=config.get('systematics','systematics')
31 systematics=systematics.split(' ')
55  
56 < #TreeVar Array
57 < MVA_Vars={}
58 < for systematic in systematics:
59 <    MVA_Vars[systematic]=config.get('treeVars',systematic)
60 <    MVA_Vars[systematic]=MVA_Vars[systematic].split(' ')
61 <
62 < ######################
63 < #Evaluate multi: Must Have same treeVars!!!
64 <
65 < Apath=sys.argv[1]
66 < arglist=sys.argv[2] #RTight_blavla,bsbsb
67 <
68 < #for axample
69 < #0 5 0
70 < #and
48 < #5 -1 1
49 <
50 < start=int(sys.argv[3])
51 < stop=int(sys.argv[4])
52 < doinfo=bool(int(sys.argv[5]))
56 >
57 > INpath = config.get('Directories','MVAin')
58 > OUTpath = config.get('Directories','MVAout')
59 >
60 > info = parse_info(samplesinfo,INpath)
61 >
62 > #infofile = open(samplesinfo,'r')
63 > #info = pickle.load(infofile)
64 > #infofile.close()
65 > arglist=opts.discr #RTight_blavla,bsbsb
66 >
67 > namelistIN=opts.names
68 > namelist=namelistIN.split(',')
69 >
70 > #doinfo=bool(int(opts.update))
71  
72   MVAlist=arglist.split(',')
73  
74 +
75   #CONFIG
76   #factory
77   factoryname=config.get('factory','factoryname')
78 +
79 + #load the namespace
80 + VHbbNameSpace=config.get('VHbbNameSpace','library')
81 + ROOT.gSystem.Load(VHbbNameSpace)
82 +
83   #MVA
60 #MVAnames=[]
61 #for MVA in MVAlist:
62 #    print MVA
63 #    MVAnames.append(config.get(MVA,'MVAname'))
64 #print Wdir+'/weights/'+factoryname+'_'+MVAname+'.info'
65 #MVAinfofiles=[]
84   MVAinfos=[]
85 + MVAdir=config.get('Directories','vhbbpath')
86   for MVAname in MVAlist:
87 <    MVAinfofile = open(Wdir+'/weights/'+factoryname+'_'+MVAname+'.info','r')
87 >    MVAinfofile = open(MVAdir+'/data/'+factoryname+'_'+MVAname+'.info','r')
88      MVAinfos.append(pickle.load(MVAinfofile))
89      MVAinfofile.close()
90      
72 treeVarSet=MVAinfos[0].varset
73 #variables
74 #TreeVar Array
75 MVA_Vars={}
76 for systematic in systematics:
77    MVA_Vars[systematic]=config.get(treeVarSet,systematic)
78    MVA_Vars[systematic]=MVA_Vars[systematic].split(' ')
79 #Spectators:
80 #spectators=config.get(treeVarSet,'spectators')
81 #spectators=spectators.split(' ')
82 #progbar quatsch
91   longe=40
92   #Workdir
93   workdir=ROOT.gDirectory.GetPath()
86 #os.mkdir(Apath+'/MVAout')
94  
88 #Book TMVA readers: MVAlist=["MMCC_bla","CC5050_bla"]
89 readers=[]
90 for MVA in MVAlist:
91    readers.append(ROOT.TMVA.Reader("!Color:!Silent"))
92
93 #define variables and specatators
94 MVA_var_buffer = []
95 for i in range(len( MVA_Vars['Nominal'])):
96    MVA_var_buffer.append(array( 'f', [ 0 ] ))
97    for reader in readers:
98        reader.AddVariable( MVA_Vars['Nominal'][i],MVA_var_buffer[i])
99 #MVA_spectator_buffer = []
100 #for i in range(len(spectators)):
101 #    MVA_spectator_buffer.append(array( 'f', [ 0 ] ))
102 #    for reader in readers:
103 #        reader.AddSpectator(spectators[i],MVA_spectator_buffer[i])
104 #Load raeder
105 for i in range(0,len(readers)):
106    readers[i].BookMVA(MVAinfos[i].MVAname,MVAinfos[i].getweightfile())
107 #--> Now the MVA is booked
95  
96   #Apply samples
97 < infofile = open(Apath+'/samples.info','r')
98 < Ainfo = pickle.load(infofile)
99 < infofile.close()
100 <
101 < #eval
102 < for job in Ainfo[start:stop]:
103 <    #get trees:
104 <    input = TFile.Open(job.getpath(),'read')
105 <    outfile = TFile.Open(job.path+'/MVAout2/'+job.prefix+job.identifier+'.root','recreate')
106 <    input.cd()
107 <    obj = ROOT.TObject
108 <    for key in ROOT.gDirectory.GetListOfKeys():
109 <        input.cd()
110 <        obj = key.ReadObj()
111 <        print obj.GetName()
112 <        if obj.GetName() == job.tree:
113 <            continue
114 <        outfile.cd()
115 <        print key.GetName()
116 <        obj.Write(key.GetName())
117 <    tree = input.Get(job.tree)
118 <    nEntries = tree.GetEntries()
119 <    outfile.cd()
120 <    newtree = tree.CloneTree(0)
121 <
122 <    #MCs:
123 <    if job.type != 'DATA':
124 <        MVA_formulas={}
125 <        for systematic in systematics:
126 <            #print '\t\t - ' + systematic
127 <            MVA_formulas[systematic]=[]
128 <            #create TTreeFormulas
129 <            for j in range(len( MVA_Vars['Nominal'])):
130 <                MVA_formulas[systematic].append(ROOT.TTreeFormula("MVA_formula%s_%s"%(j,systematic),MVA_Vars[systematic][j],tree))
131 <        outfile.cd()
132 <        #Setup Branches
133 <        MVAbranches=[]
134 <        for i in range(0,len(readers)):
135 <            MVAbranches.append(array('f',[0]*9))
136 <            newtree.Branch(MVAinfos[i].MVAname,MVAbranches[i],'nominal:JER_up:JER_down:JES_up:JES_down:beff_up:beff_down:bmis_up:bmis_down/F')
137 <        print '\n--> ' + job.name +':'
138 <        #progbar setup
139 <        if nEntries >= longe:
140 <            step=int(nEntries/longe)
141 <            long=longe
142 <        else:
156 <            long=nEntries
157 <            step = 1
158 <        bar=progbar(long)
159 <        #Fill event by event:
160 <        for entry in range(0,nEntries):
161 <            if entry % step == 0:
162 <                bar.move()
163 <            #load entry
164 <            tree.GetEntry(entry)
165 <            for systematic in systematics:
166 <                for j in range(len( MVA_Vars['Nominal'])):
167 <                    MVA_var_buffer[j][0] = MVA_formulas[systematic][j].EvalInstance()
168 <                    
169 <                for j in range(0,len(readers)):
170 <                    MVAbranches[j][systematics.index(systematic)] = readers[j].EvaluateMVA(MVAinfos[j].MVAname)
171 <            #Fill:
172 <            newtree.Fill()
173 <        newtree.AutoSave()
174 <        outfile.Close()
175 <        
176 <    #DATA:
177 <    if job.type == 'DATA':
178 <        #MVA Formulas
179 <        MVA_formulas_Nominal = []
180 <        #create TTreeFormulas
181 <        for j in range(len( MVA_Vars['Nominal'])):
182 <            MVA_formulas_Nominal.append(ROOT.TTreeFormula("MVA_formula%s_Nominal"%j, MVA_Vars['Nominal'][j],tree))
183 <        outfile.cd()
184 <        MVAbranches=[]
185 <        for i in range(0,len(readers)):
186 <            MVAbranches.append(array('f',[0]))
187 <            newtree.Branch(MVAinfos[i].MVAname,MVAbranches[i],'nominal/F')
188 <        #progbar          
189 <        print '\n--> ' + job.name +':'
190 <        if nEntries >= longe:
191 <            step=int(nEntries/longe)
192 <            long=longe
193 <        else:
194 <            long=nEntries
195 <            step = 1
196 <        bar=progbar(long)
197 <        #Fill event by event:
198 <        for entry in range(0,nEntries):
199 <            if entry % step == 0:
200 <                bar.move()
201 <            #load entry
202 <            tree.GetEntry(entry)
203 <            #nominal:
204 <            for j in range(len( MVA_Vars['Nominal'])):
205 <                    MVA_var_buffer[j][0] = MVA_formulas_Nominal[j].EvalInstance()
206 <                    
207 <            for j in range(0,len(readers)):
208 <                MVAbranches[j][0]= readers[j].EvaluateMVA(MVAinfos[j].MVAname)
209 <            newtree.Fill()
210 <        newtree.AutoSave()
211 <        outfile.Close()
212 <
213 <
97 > #infofile = open(samplesinfo,'r')
98 > #Ainfo = pickle.load(infofile)
99 > #infofile.close()
100 >
101 >
102 > class MvaEvaluater:
103 >    def __init__(self, config, MVAinfo):
104 >        self.varset = MVAinfo.varset
105 >        #Define reader
106 >        self.reader = ROOT.TMVA.Reader("!Color:!Silent")
107 >        MVAdir=config.get('Directories','vhbbpath')
108 >        self.systematics=config.get('systematics','systematics').split(' ')
109 >        self.MVA_Vars={}
110 >        self.MVAname = MVAinfo.MVAname
111 >        for systematic in self.systematics:
112 >            self.MVA_Vars[systematic]=config.get(self.varset,systematic)
113 >            self.MVA_Vars[systematic]=self.MVA_Vars[systematic].split(' ')
114 >        #define variables and specatators
115 >        self.MVA_var_buffer = []
116 >        for i in range(len( self.MVA_Vars['Nominal'])):
117 >            self.MVA_var_buffer.append(array( 'f', [ 0 ] ))
118 >            self.reader.AddVariable( self.MVA_Vars['Nominal'][i],self.MVA_var_buffer[i])
119 >        self.reader.BookMVA(MVAinfo.MVAname,MVAdir+'/data/'+MVAinfo.getweightfile())
120 >        #--> Now the MVA is booked
121 >
122 >    def setBranches(self,tree,job):
123 >        #Set formulas for all vars
124 >        self.MVA_formulas={}
125 >        for systematic in self.systematics:
126 >            if job.type == 'DATA' and not systematic == 'Nominal': continue
127 >            self.MVA_formulas[systematic]=[]
128 >            for j in range(len( self.MVA_Vars['Nominal'])):
129 >                self.MVA_formulas[systematic].append(ROOT.TTreeFormula("MVA_formula%s_%s"%(j,systematic),self.MVA_Vars[systematic][j],tree))
130 >
131 >    def evaluate(self,MVAbranches,job):
132 >        #Evaluate all vars and fill the branches
133 >        for systematic in self.systematics:
134 >            for j in range(len( self.MVA_Vars['Nominal'])):
135 >                if job.type == 'DATA' and not systematic == 'Nominal': continue
136 >                self.MVA_var_buffer[j][0] = self.MVA_formulas[systematic][j].EvalInstance()                
137 >            MVAbranches[self.systematics.index(systematic)] = self.reader.EvaluateMVA(self.MVAname)
138 >
139 >
140 > theMVAs = []
141 > for mva in MVAinfos:
142 >    theMVAs.append(MvaEvaluater(config,mva))
143  
144  
145 + #eval
146 + for job in info:
147 +    if eval(job.active):
148 +        if job.name in namelist:
149 +            #get trees:
150 +            print INpath+'/'+job.prefix+job.identifier+'.root'
151 +            input = ROOT.TFile.Open(INpath+'/'+job.prefix+job.identifier+'.root','read')
152 +            print OUTpath+'/'+job.prefix+job.identifier+'.root'
153 +            outfile = ROOT.TFile.Open(OUTpath+'/'+job.prefix+job.identifier+'.root','recreate')
154 +            input.cd()
155 +            obj = ROOT.TObject
156 +            for key in ROOT.gDirectory.GetListOfKeys():
157 +                input.cd()
158 +                obj = key.ReadObj()
159 +                #print obj.GetName()
160 +                if obj.GetName() == job.tree:
161 +                    continue
162 +                outfile.cd()
163 +                #print key.GetName()
164 +                obj.Write(key.GetName())
165 +            tree = input.Get(job.tree)
166 +            nEntries = tree.GetEntries()
167 +            outfile.cd()
168 +            newtree = tree.CloneTree(0)
169 +            
170 +
171 +            #Set branch adress for all vars
172 +            for i in range(0,len(theMVAs)):
173 +                theMVAs[i].setBranches(tree,job)
174 +            outfile.cd()
175 +            #Setup Branches
176 +            MVAbranches=[]
177 +            for i in range(0,len(theMVAs)):
178 +                if job.type == 'Data':
179 +                    MVAbranches.append(array('f',[0]))
180 +                    newtree.Branch(MVAinfos[i].MVAname,MVAbranches[i],'nominal/F')
181 +                else:
182 +                    MVAbranches.append(array('f',[0]*11))
183 +                    newtree.Branch(theMVAs[i].MVAname,MVAbranches[i],'nominal:JER_up:JER_down:JES_up:JES_down:beff_up:beff_down:bmis_up:bmis_down:beff1_up:beff1_down/F')
184 +                MVA_formulas_Nominal = []
185 +            print '\n--> ' + job.name +':'
186 +            #progbar setup
187 +            if nEntries >= longe:
188 +                step=int(nEntries/longe)
189 +                long=longe
190 +            else:
191 +                long=nEntries
192 +                step = 1
193 +            bar=progbar(long)
194 +            #Fill event by event:
195 +            for entry in range(0,nEntries):
196 +                if entry % step == 0:
197 +                    bar.move()
198 +                #load entry
199 +                tree.GetEntry(entry)
200 +                            
201 +                for i in range(0,len(theMVAs)):
202 +                    theMVAs[i].evaluate(MVAbranches[i],job)
203 +                #Fill:
204 +                newtree.Fill()
205 +            newtree.AutoSave()
206 +            outfile.Close()
207 +                
208 + print '\n'
209  
210   #Update Info:
211 < if doinfo:
212 <    for job in Ainfo:        
213 <        for MVAinfo in MVAinfos:
214 <            job.addcomment('Added MVA %s'%MVAinfo.MVAname)
215 <        job.addpath('/MVAout')
216 <    infofile = open(Apath+'/MVAout/samples.info','w')
217 <    pickle.dump(Ainfo,infofile)
218 <    infofile.close()
226 <
227 <
211 > #if doinfo:
212 > #    for job in Ainfo:        
213 > #        for MVAinfo in MVAinfos:
214 > #            job.addcomment('Added MVA %s'%MVAinfo.MVAname)
215 > #        job.addpath(MVAdir)
216 > #    infofile = open(samplesinfo,'w')
217 > #    pickle.dump(Ainfo,infofile)
218 > #    infofile.close()

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines