ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/evaluateMVA.py
(Generate patch)

Comparing UserCode/VHbb/python/evaluateMVA.py (file contents):
Revision 1.5 by peller, Wed May 23 11:44:41 2012 UTC vs.
Revision 1.19 by bortigno, Fri Jan 11 13:47:24 2013 UTC

# Line 2 | Line 2
2   import sys
3   import os
4   import ROOT
5 from ROOT import TFile
5   from array import array
6   from math import sqrt
7   from copy import copy
8   #suppres the EvalInstace conversion warning bug
9   import warnings
10   warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
11 < from ConfigParser import SafeConfigParser
11 > from optparse import OptionParser
12 > from BetterConfigParser import BetterConfigParser
13 > import pickle
14 >
15 > #CONFIGURE
16 > ROOT.gROOT.SetBatch(True)
17 > print 'hello'
18 > #load config
19 > #os.mkdir(path+'/sys')
20 > argv = sys.argv
21 > parser = OptionParser()
22 > parser.add_option("-U", "--update", dest="update", default=0,
23 >                      help="update infofile")
24 > parser.add_option("-D", "--discr", dest="discr", default="",
25 >                      help="discriminators to be added")
26 > #parser.add_option("-I", "--inpath", dest="inpath", default="",
27 > #                      help="path to samples")
28 > #parser.add_option("-O", "--outpath", dest="outpath", default="",
29 > #                      help="path where to store output samples")
30 > parser.add_option("-S", "--samples", dest="names", default="",
31 >                      help="samples you want to run on")
32 > parser.add_option("-C", "--config", dest="config", default=[], action="append",
33 >                      help="configuration file")
34 > (opts, args) = parser.parse_args(argv)
35 >
36   from samplesclass import sample
37   from mvainfos import mvainfo
15 import pickle
38   from progbar import progbar
39   from printcolor import printc
40  
41 < #CONFIGURE
42 <
43 < #load config
44 < config = SafeConfigParser()
45 < config.read('./config')
41 > if opts.config =="":
42 >        opts.config = "config"
43 > config = BetterConfigParser()
44 > #config.read('./config7TeV_ZZ')
45 > config.read(opts.config)
46 > anaTag = config.get("Analysis","tag")
47  
48   #get locations:
49   Wdir=config.get('Directories','Wdir')
50 <
51 < MVAdir=config.get('Directories','MVAdir')
50 > MVASubdir=config.get('Directories','MVAdir')
51 > samplesinfo=config.get('Directories','samplesinfo')
52  
53   #systematics
31 systematics=config.get('systematics','systematics')
32 systematics=systematics.split(' ')
54  
34 #TreeVar Array
35 MVA_Vars={}
36 for systematic in systematics:
37    MVA_Vars[systematic]=config.get('treeVars',systematic)
38    MVA_Vars[systematic]=MVA_Vars[systematic].split(' ')
55  
56 < ######################
57 < #Evaluate multi: Must Have same treeVars!!!
56 > INpath = config.get('Directories','MVAin')
57 > OUTpath = config.get('Directories','MVAout')
58  
59 < Apath=sys.argv[1]
60 < arglist=sys.argv[2] #RTight_blavla,bsbsb
59 > infofile = open(samplesinfo,'r')
60 > info = pickle.load(infofile)
61 > infofile.close()
62 > arglist=opts.discr #RTight_blavla,bsbsb
63  
64 < namelistIN=sys.argv[3]
64 > namelistIN=opts.names
65   namelist=namelistIN.split(',')
66  
67 < doinfo=bool(int(sys.argv[4]))
67 > doinfo=bool(int(opts.update))
68  
69   MVAlist=arglist.split(',')
70  
71 +
72   #CONFIG
73   #factory
74   factoryname=config.get('factory','factoryname')
75 +
76 + #load the namespace
77 + VHbbNameSpace=config.get('VHbbNameSpace','library')
78 + ROOT.gSystem.Load(VHbbNameSpace)
79 +
80   #MVA
57 #MVAnames=[]
58 #for MVA in MVAlist:
59 #    print MVA
60 #    MVAnames.append(config.get(MVA,'MVAname'))
61 #print Wdir+'/weights/'+factoryname+'_'+MVAname+'.info'
62 #MVAinfofiles=[]
81   MVAinfos=[]
82 + MVAdir=config.get('Directories','vhbbpath')
83   for MVAname in MVAlist:
84 <    MVAinfofile = open(Wdir+'/weights/'+factoryname+'_'+MVAname+'.info','r')
84 >    MVAinfofile = open(MVAdir+'/data/'+factoryname+'_'+MVAname+'.info','r')
85      MVAinfos.append(pickle.load(MVAinfofile))
86      MVAinfofile.close()
87      
69 treeVarSet=MVAinfos[0].varset
70 #variables
71 #TreeVar Array
72 MVA_Vars={}
73 for systematic in systematics:
74    MVA_Vars[systematic]=config.get(treeVarSet,systematic)
75    MVA_Vars[systematic]=MVA_Vars[systematic].split(' ')
76 #Spectators:
77 #spectators=config.get(treeVarSet,'spectators')
78 #spectators=spectators.split(' ')
79 #progbar quatsch
88   longe=40
89   #Workdir
90   workdir=ROOT.gDirectory.GetPath()
83 #os.mkdir(Apath+'/MVAout')
91  
85 #Book TMVA readers: MVAlist=["MMCC_bla","CC5050_bla"]
86 readers=[]
87 for MVA in MVAlist:
88    readers.append(ROOT.TMVA.Reader("!Color:!Silent"))
89
90 #define variables and specatators
91 MVA_var_buffer = []
92 MVA_var_buffer4 = []
93 for i in range(len( MVA_Vars['Nominal'])):
94    MVA_var_buffer.append(array( 'f', [ 0 ] ))
95    for reader in readers:
96        reader.AddVariable( MVA_Vars['Nominal'][i],MVA_var_buffer[i])
97 #MVA_spectator_buffer = []
98 #for i in range(len(spectators)):
99 #    MVA_spectator_buffer.append(array( 'f', [ 0 ] ))
100 #    for reader in readers:
101 #        reader.AddSpectator(spectators[i],MVA_spectator_buffer[i])
102 #Load raeder
103 for i in range(0,len(readers)):
104    readers[i].BookMVA(MVAinfos[i].MVAname,MVAinfos[i].getweightfile())
105 #--> Now the MVA is booked
92  
93   #Apply samples
94 < infofile = open(Apath+'/samples.info','r')
94 > infofile = open(samplesinfo,'r')
95   Ainfo = pickle.load(infofile)
96   infofile.close()
97  
98 +
99 + class MvaEvaluater:
100 +    def __init__(self, config, MVAinfo):
101 +        self.varset = MVAinfo.varset
102 +        #Define reader
103 +        self.reader = ROOT.TMVA.Reader("!Color:!Silent")
104 +        MVAdir=config.get('Directories','vhbbpath')
105 +        self.systematics=config.get('systematics','systematics').split(' ')
106 +        self.MVA_Vars={}
107 +        self.MVAname = MVAinfo.MVAname
108 +        for systematic in self.systematics:
109 +            self.MVA_Vars[systematic]=config.get(self.varset,systematic)
110 +            self.MVA_Vars[systematic]=self.MVA_Vars[systematic].split(' ')
111 +        #define variables and specatators
112 +        self.MVA_var_buffer = []
113 +        for i in range(len( self.MVA_Vars['Nominal'])):
114 +            self.MVA_var_buffer.append(array( 'f', [ 0 ] ))
115 +            self.reader.AddVariable( self.MVA_Vars['Nominal'][i],self.MVA_var_buffer[i])
116 +        self.reader.BookMVA(MVAinfo.MVAname,MVAdir+'/data/'+MVAinfo.getweightfile())
117 +        #--> Now the MVA is booked
118 +
119 +    def setBranches(self,tree,job):
120 +        #Set formulas for all vars
121 +        self.MVA_formulas={}
122 +        for systematic in self.systematics:
123 +            if job.type == 'DATA' and not systematic == 'Nominal': continue
124 +            self.MVA_formulas[systematic]=[]
125 +            for j in range(len( self.MVA_Vars['Nominal'])):
126 +                self.MVA_formulas[systematic].append(ROOT.TTreeFormula("MVA_formula%s_%s"%(j,systematic),self.MVA_Vars[systematic][j],tree))
127 +
128 +    def evaluate(self,MVAbranches,job):
129 +        #Evaluate all vars and fill the branches
130 +        for systematic in self.systematics:
131 +            for j in range(len( self.MVA_Vars['Nominal'])):
132 +                if job.type == 'DATA' and not systematic == 'Nominal': continue
133 +                self.MVA_var_buffer[j][0] = self.MVA_formulas[systematic][j].EvalInstance()                
134 +            MVAbranches[self.systematics.index(systematic)] = self.reader.EvaluateMVA(self.MVAname)
135 +
136 +
137 + theMVAs = []
138 + for mva in MVAinfos:
139 +    theMVAs.append(MvaEvaluater(config,mva))
140 +
141 +
142   #eval
143   for job in Ainfo:
144 <
145 <    if job.name in namelist:
146 <        #get trees:
147 <        input = TFile.Open(job.getpath(),'read')
148 <        outfile = TFile.Open(job.path+'/'+MVAdir+job.prefix+job.identifier+'.root','recreate')
149 <        input.cd()
150 <        obj = ROOT.TObject
121 <        for key in ROOT.gDirectory.GetListOfKeys():
144 >    if eval(job.active):
145 >        if job.name in namelist:
146 >            #get trees:
147 >            print INpath+'/'+job.prefix+job.identifier+'.root'
148 >            input = ROOT.TFile.Open(INpath+'/'+job.prefix+job.identifier+'.root','read')
149 >            print OUTpath+'/'+job.prefix+job.identifier+'.root'
150 >            outfile = ROOT.TFile.Open(OUTpath+'/'+job.prefix+job.identifier+'.root','recreate')
151              input.cd()
152 <            obj = key.ReadObj()
153 <            #print obj.GetName()
154 <            if obj.GetName() == job.tree:
155 <                continue
152 >            obj = ROOT.TObject
153 >            for key in ROOT.gDirectory.GetListOfKeys():
154 >                input.cd()
155 >                obj = key.ReadObj()
156 >                #print obj.GetName()
157 >                if obj.GetName() == job.tree:
158 >                    continue
159 >                outfile.cd()
160 >                #print key.GetName()
161 >                obj.Write(key.GetName())
162 >            tree = input.Get(job.tree)
163 >            nEntries = tree.GetEntries()
164              outfile.cd()
165 <            #print key.GetName()
166 <            obj.Write(key.GetName())
167 <        tree = input.Get(job.tree)
168 <        nEntries = tree.GetEntries()
169 <        outfile.cd()
170 <        newtree = tree.CloneTree(0)
134 <
135 <        #MCs:
136 <        if job.type != 'DATA':
137 <            MVA_formulas={}
138 <            MVA_formulas4={}
139 <            for systematic in systematics:
140 <                #print '\t\t - ' + systematic
141 <                MVA_formulas[systematic]=[]
142 <                MVA_formulas4[systematic]=[]
143 <                #create TTreeFormulas
144 <                for j in range(len( MVA_Vars['Nominal'])):
145 <                    MVA_formulas[systematic].append(ROOT.TTreeFormula("MVA_formula%s_%s"%(j,systematic),MVA_Vars[systematic][j],tree))
146 <                    MVA_formulas4[systematic].append(ROOT.TTreeFormula("MVA_formula4%s_%s"%(j,systematic),MVA_Vars['Nominal'][j]+'+('+MVA_Vars[systematic][j]+'-'+MVA_Vars['Nominal'][j]+')*4',tree))#HERE change
165 >            newtree = tree.CloneTree(0)
166 >            
167 >
168 >            #Set branch adress for all vars
169 >            for i in range(0,len(theMVAs)):
170 >                theMVAs[i].setBranches(tree,job)
171              outfile.cd()
172              #Setup Branches
173              MVAbranches=[]
174 <            MVAbranches4=[]
175 <            for i in range(0,len(readers)):
176 <                MVAbranches.append(array('f',[0]*9))
177 <                MVAbranches4.append(array('f',[0]*9))
178 <                newtree.Branch(MVAinfos[i].MVAname,MVAbranches[i],'nominal:JER_up:JER_down:JES_up:JES_down:beff_up:beff_down:bmis_up:bmis_down/F')
179 <                newtree.Branch(MVAinfos[i].MVAname+'_4',MVAbranches4[i],'nominal:JER_up:JER_down:JES_up:JES_down:beff_up:beff_down:bmis_up:bmis_down/F')
174 >            for i in range(0,len(theMVAs)):
175 >                if job.type == 'Data':
176 >                    MVAbranches.append(array('f',[0]))
177 >                    newtree.Branch(MVAinfos[i].MVAname,MVAbranches[i],'nominal/F')
178 >                else:
179 >                    MVAbranches.append(array('f',[0]*11))
180 >                    newtree.Branch(theMVAs[i].MVAname,MVAbranches[i],'nominal:JER_up:JER_down:JES_up:JES_down:beff_up:beff_down:bmis_up:bmis_down:beff1_up:beff1_down/F')
181 >                MVA_formulas_Nominal = []
182              print '\n--> ' + job.name +':'
183              #progbar setup
184              if nEntries >= longe:
# Line 168 | Line 194 | for job in Ainfo:
194                      bar.move()
195                  #load entry
196                  tree.GetEntry(entry)
197 <                for systematic in systematics:
198 <                    for j in range(len( MVA_Vars['Nominal'])):
199 <                        MVA_var_buffer[j][0] = MVA_formulas[systematic][j].EvalInstance()
174 <                        
175 <                    for j in range(0,len(readers)):
176 <                        MVAbranches[j][systematics.index(systematic)] = readers[j].EvaluateMVA(MVAinfos[j].MVAname)
177 <                        
178 <                    for j in range(len( MVA_Vars['Nominal'])):
179 <                        MVA_var_buffer[j][0] = MVA_formulas4[systematic][j].EvalInstance()
180 <                        
181 <                    for j in range(0,len(readers)):
182 <                        MVAbranches4[j][systematics.index(systematic)] = readers[j].EvaluateMVA(MVAinfos[j].MVAname)
197 >                            
198 >                for i in range(0,len(theMVAs)):
199 >                    theMVAs[i].evaluate(MVAbranches[i],job)
200                  #Fill:
201                  newtree.Fill()
202              newtree.AutoSave()
203              outfile.Close()
204 <            
188 <        #DATA:
189 <        if job.type == 'DATA':
190 <            #MVA Formulas
191 <            MVA_formulas_Nominal = []
192 <            #create TTreeFormulas
193 <            for j in range(len( MVA_Vars['Nominal'])):
194 <                MVA_formulas_Nominal.append(ROOT.TTreeFormula("MVA_formula%s_Nominal"%j, MVA_Vars['Nominal'][j],tree))
195 <            outfile.cd()
196 <            MVAbranches=[]
197 <            for i in range(0,len(readers)):
198 <                MVAbranches.append(array('f',[0]))
199 <                newtree.Branch(MVAinfos[i].MVAname,MVAbranches[i],'nominal/F')
200 <                newtree.Branch(MVAinfos[i].MVAname+'_4',MVAbranches[i],'nominal/F')
201 <            #progbar          
202 <            print '\n--> ' + job.name +':'
203 <            if nEntries >= longe:
204 <                step=int(nEntries/longe)
205 <                long=longe
206 <            else:
207 <                long=nEntries
208 <                step = 1
209 <            bar=progbar(long)
210 <            #Fill event by event:
211 <            for entry in range(0,nEntries):
212 <                if entry % step == 0:
213 <                    bar.move()
214 <                #load entry
215 <                tree.GetEntry(entry)
216 <                #nominal:
217 <                for j in range(len( MVA_Vars['Nominal'])):
218 <                        MVA_var_buffer[j][0] = MVA_formulas_Nominal[j].EvalInstance()
219 <                        
220 <                for j in range(0,len(readers)):
221 <                    MVAbranches[j][0]= readers[j].EvaluateMVA(MVAinfos[j].MVAname)
222 <                newtree.Fill()
223 <            newtree.AutoSave()
224 <            outfile.Close()
225 <
204 >                
205   print '\n'
206  
207   #Update Info:
# Line 231 | Line 210 | if doinfo:
210          for MVAinfo in MVAinfos:
211              job.addcomment('Added MVA %s'%MVAinfo.MVAname)
212          job.addpath(MVAdir)
213 <    infofile = open(Apath+MVAdir+'/samples.info','w')
213 >    infofile = open(samplesinfo,'w')
214      pickle.dump(Ainfo,infofile)
215      infofile.close()
216  

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines