ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/evaluateMVA.py
(Generate patch)

Comparing UserCode/VHbb/python/evaluateMVA.py (file contents):
Revision 1.1 by peller, Tue May 8 10:37:17 2012 UTC vs.
Revision 1.19 by bortigno, Fri Jan 11 13:47:24 2013 UTC

# Line 2 | Line 2
2   import sys
3   import os
4   import ROOT
5 from ROOT import TFile
5   from array import array
6   from math import sqrt
7   from copy import copy
8   #suppres the EvalInstace conversion warning bug
9   import warnings
10   warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
11 < from ConfigParser import SafeConfigParser
12 < from samplesinfo import sample
14 < from mvainfos import mvainfo
11 > from optparse import OptionParser
12 > from BetterConfigParser import BetterConfigParser
13   import pickle
16 from progbar import progbar
17 from printcolor import printc
14  
15   #CONFIGURE
16 <
16 > ROOT.gROOT.SetBatch(True)
17 > print 'hello'
18   #load config
19 < config = SafeConfigParser()
20 < config.read('./config')
19 > #os.mkdir(path+'/sys')
20 > argv = sys.argv
21 > parser = OptionParser()
22 > parser.add_option("-U", "--update", dest="update", default=0,
23 >                      help="update infofile")
24 > parser.add_option("-D", "--discr", dest="discr", default="",
25 >                      help="discriminators to be added")
26 > #parser.add_option("-I", "--inpath", dest="inpath", default="",
27 > #                      help="path to samples")
28 > #parser.add_option("-O", "--outpath", dest="outpath", default="",
29 > #                      help="path where to store output samples")
30 > parser.add_option("-S", "--samples", dest="names", default="",
31 >                      help="samples you want to run on")
32 > parser.add_option("-C", "--config", dest="config", default=[], action="append",
33 >                      help="configuration file")
34 > (opts, args) = parser.parse_args(argv)
35 >
36 > from samplesclass import sample
37 > from mvainfos import mvainfo
38 > from progbar import progbar
39 > from printcolor import printc
40 >
41 > if opts.config =="":
42 >        opts.config = "config"
43 > config = BetterConfigParser()
44 > #config.read('./config7TeV_ZZ')
45 > config.read(opts.config)
46 > anaTag = config.get("Analysis","tag")
47  
48   #get locations:
49   Wdir=config.get('Directories','Wdir')
50 <
50 > MVASubdir=config.get('Directories','MVAdir')
51 > samplesinfo=config.get('Directories','samplesinfo')
52  
53   #systematics
30 systematics=config.get('systematics','systematics')
31 systematics=systematics.split(' ')
54  
55 < #TreeVar Array
56 < MVA_Vars={}
57 < for systematic in systematics:
58 <    MVA_Vars[systematic]=config.get('treeVars',systematic)
59 <    MVA_Vars[systematic]=MVA_Vars[systematic].split(' ')
60 <
61 < ######################
62 < #Evaluate multi: Must Have same treeVars!!!
63 <
64 < Apath=sys.argv[1]
65 < arglist=sys.argv[2] #RTight_blavla,bsbsb
66 <
67 < #for axample
46 < #0 5 0
47 < #and
48 < #5 -1 1
49 <
50 < start=int(sys.argv[3])
51 < stop=int(sys.argv[4])
52 < doinfo=bool(int(sys.argv[5]))
55 >
56 > INpath = config.get('Directories','MVAin')
57 > OUTpath = config.get('Directories','MVAout')
58 >
59 > infofile = open(samplesinfo,'r')
60 > info = pickle.load(infofile)
61 > infofile.close()
62 > arglist=opts.discr #RTight_blavla,bsbsb
63 >
64 > namelistIN=opts.names
65 > namelist=namelistIN.split(',')
66 >
67 > doinfo=bool(int(opts.update))
68  
69   MVAlist=arglist.split(',')
70  
71 +
72   #CONFIG
73   #factory
74   factoryname=config.get('factory','factoryname')
75 +
76 + #load the namespace
77 + VHbbNameSpace=config.get('VHbbNameSpace','library')
78 + ROOT.gSystem.Load(VHbbNameSpace)
79 +
80   #MVA
60 #MVAnames=[]
61 #for MVA in MVAlist:
62 #    print MVA
63 #    MVAnames.append(config.get(MVA,'MVAname'))
64 #print Wdir+'/weights/'+factoryname+'_'+MVAname+'.info'
65 #MVAinfofiles=[]
81   MVAinfos=[]
82 + MVAdir=config.get('Directories','vhbbpath')
83   for MVAname in MVAlist:
84 <    MVAinfofile = open(Wdir+'/weights/'+factoryname+'_'+MVAname+'.info','r')
84 >    MVAinfofile = open(MVAdir+'/data/'+factoryname+'_'+MVAname+'.info','r')
85      MVAinfos.append(pickle.load(MVAinfofile))
86      MVAinfofile.close()
87      
72 treeVarSet=MVAinfos[0].varset
73 #variables
74 #TreeVar Array
75 MVA_Vars={}
76 for systematic in systematics:
77    MVA_Vars[systematic]=config.get(treeVarSet,systematic)
78    MVA_Vars[systematic]=MVA_Vars[systematic].split(' ')
79 #Spectators:
80 #spectators=config.get(treeVarSet,'spectators')
81 #spectators=spectators.split(' ')
82 #progbar quatsch
88   longe=40
89   #Workdir
90   workdir=ROOT.gDirectory.GetPath()
86 #os.mkdir(Apath+'/MVAout')
91  
88 #Book TMVA readers: MVAlist=["MMCC_bla","CC5050_bla"]
89 readers=[]
90 for MVA in MVAlist:
91    readers.append(ROOT.TMVA.Reader("!Color:!Silent"))
92
93 #define variables and specatators
94 MVA_var_buffer = []
95 for i in range(len( MVA_Vars['Nominal'])):
96    MVA_var_buffer.append(array( 'f', [ 0 ] ))
97    for reader in readers:
98        reader.AddVariable( MVA_Vars['Nominal'][i],MVA_var_buffer[i])
99 #MVA_spectator_buffer = []
100 #for i in range(len(spectators)):
101 #    MVA_spectator_buffer.append(array( 'f', [ 0 ] ))
102 #    for reader in readers:
103 #        reader.AddSpectator(spectators[i],MVA_spectator_buffer[i])
104 #Load raeder
105 for i in range(0,len(readers)):
106    readers[i].BookMVA(MVAinfos[i].MVAname,MVAinfos[i].getweightfile())
107 #--> Now the MVA is booked
92  
93   #Apply samples
94 < infofile = open(Apath+'/samples.info','r')
94 > infofile = open(samplesinfo,'r')
95   Ainfo = pickle.load(infofile)
96   infofile.close()
97  
114 #eval
115 for job in Ainfo[start:stop]:
116    #get trees:
117    input = TFile.Open(job.getpath(),'read')
118    outfile = TFile.Open(job.path+'/MVAout/'+job.prefix+job.identifier+'.root','recreate')
119    input.cd()
120    obj = ROOT.TObject
121    for key in ROOT.gDirectory.GetListOfKeys():
122        input.cd()
123        obj = key.ReadObj()
124        print obj.GetName()
125        if obj.GetName() == job.tree:
126            continue
127        outfile.cd()
128        print key.GetName()
129        obj.Write(key.GetName())
130    tree = input.Get(job.tree)
131    nEntries = tree.GetEntries()
132    outfile.cd()
133    newtree = tree.CloneTree(0)
134
135    #MCs:
136    if job.type != 'DATA':
137        MVA_formulas={}
138        for systematic in systematics:
139            #print '\t\t - ' + systematic
140            MVA_formulas[systematic]=[]
141            #create TTreeFormulas
142            for j in range(len( MVA_Vars['Nominal'])):
143                MVA_formulas[systematic].append(ROOT.TTreeFormula("MVA_formula%s_%s"%(j,systematic),MVA_Vars[systematic][j],tree))
144        outfile.cd()
145        #Setup Branches
146        MVAbranches=[]
147        for i in range(0,len(readers)):
148            MVAbranches.append(array('f',[0]*9))
149            newtree.Branch(MVAinfos[i].MVAname,MVAbranches[i],'nominal:JER_up:JER_down:JES_up:JES_down:beff_up:beff_down:bmis_up:bmis_down/F')
150        print '\n--> ' + job.name +':'
151        #progbar setup
152        if nEntries >= longe:
153            step=int(nEntries/longe)
154            long=longe
155        else:
156            long=nEntries
157            step = 1
158        bar=progbar(long)
159        #Fill event by event:
160        for entry in range(0,nEntries):
161            if entry % step == 0:
162                bar.move()
163            #load entry
164            tree.GetEntry(entry)
165            for systematic in systematics:
166                for j in range(len( MVA_Vars['Nominal'])):
167                    MVA_var_buffer[j][0] = MVA_formulas[systematic][j].EvalInstance()
168                    
169                for j in range(0,len(readers)):
170                    MVAbranches[j][systematics.index(systematic)] = readers[j].EvaluateMVA(MVAinfos[j].MVAname)
171            #Fill:
172            newtree.Fill()
173        newtree.AutoSave()
174        outfile.Close()
175        
176    #DATA:
177    if job.type == 'DATA':
178        #MVA Formulas
179        MVA_formulas_Nominal = []
180        #create TTreeFormulas
181        for j in range(len( MVA_Vars['Nominal'])):
182            MVA_formulas_Nominal.append(ROOT.TTreeFormula("MVA_formula%s_Nominal"%j, MVA_Vars['Nominal'][j],tree))
183        outfile.cd()
184        MVAbranches=[]
185        for i in range(0,len(readers)):
186            MVAbranches.append(array('f',[0]))
187            newtree.Branch(MVAinfos[i].MVAname,MVAbranches[i],'nominal/F')
188        #progbar          
189        print '\n--> ' + job.name +':'
190        if nEntries >= longe:
191            step=int(nEntries/longe)
192            long=longe
193        else:
194            long=nEntries
195            step = 1
196        bar=progbar(long)
197        #Fill event by event:
198        for entry in range(0,nEntries):
199            if entry % step == 0:
200                bar.move()
201            #load entry
202            tree.GetEntry(entry)
203            #nominal:
204            for j in range(len( MVA_Vars['Nominal'])):
205                    MVA_var_buffer[j][0] = MVA_formulas_Nominal[j].EvalInstance()
206                    
207            for j in range(0,len(readers)):
208                MVAbranches[j][0]= readers[j].EvaluateMVA(MVAinfos[j].MVAname)
209            newtree.Fill()
210        newtree.AutoSave()
211        outfile.Close()
212
98  
99 + class MvaEvaluater:
100 +    def __init__(self, config, MVAinfo):
101 +        self.varset = MVAinfo.varset
102 +        #Define reader
103 +        self.reader = ROOT.TMVA.Reader("!Color:!Silent")
104 +        MVAdir=config.get('Directories','vhbbpath')
105 +        self.systematics=config.get('systematics','systematics').split(' ')
106 +        self.MVA_Vars={}
107 +        self.MVAname = MVAinfo.MVAname
108 +        for systematic in self.systematics:
109 +            self.MVA_Vars[systematic]=config.get(self.varset,systematic)
110 +            self.MVA_Vars[systematic]=self.MVA_Vars[systematic].split(' ')
111 +        #define variables and specatators
112 +        self.MVA_var_buffer = []
113 +        for i in range(len( self.MVA_Vars['Nominal'])):
114 +            self.MVA_var_buffer.append(array( 'f', [ 0 ] ))
115 +            self.reader.AddVariable( self.MVA_Vars['Nominal'][i],self.MVA_var_buffer[i])
116 +        self.reader.BookMVA(MVAinfo.MVAname,MVAdir+'/data/'+MVAinfo.getweightfile())
117 +        #--> Now the MVA is booked
118 +
119 +    def setBranches(self,tree,job):
120 +        #Set formulas for all vars
121 +        self.MVA_formulas={}
122 +        for systematic in self.systematics:
123 +            if job.type == 'DATA' and not systematic == 'Nominal': continue
124 +            self.MVA_formulas[systematic]=[]
125 +            for j in range(len( self.MVA_Vars['Nominal'])):
126 +                self.MVA_formulas[systematic].append(ROOT.TTreeFormula("MVA_formula%s_%s"%(j,systematic),self.MVA_Vars[systematic][j],tree))
127 +
128 +    def evaluate(self,MVAbranches,job):
129 +        #Evaluate all vars and fill the branches
130 +        for systematic in self.systematics:
131 +            for j in range(len( self.MVA_Vars['Nominal'])):
132 +                if job.type == 'DATA' and not systematic == 'Nominal': continue
133 +                self.MVA_var_buffer[j][0] = self.MVA_formulas[systematic][j].EvalInstance()                
134 +            MVAbranches[self.systematics.index(systematic)] = self.reader.EvaluateMVA(self.MVAname)
135 +
136 +
137 + theMVAs = []
138 + for mva in MVAinfos:
139 +    theMVAs.append(MvaEvaluater(config,mva))
140  
141  
142 + #eval
143 + for job in Ainfo:
144 +    if eval(job.active):
145 +        if job.name in namelist:
146 +            #get trees:
147 +            print INpath+'/'+job.prefix+job.identifier+'.root'
148 +            input = ROOT.TFile.Open(INpath+'/'+job.prefix+job.identifier+'.root','read')
149 +            print OUTpath+'/'+job.prefix+job.identifier+'.root'
150 +            outfile = ROOT.TFile.Open(OUTpath+'/'+job.prefix+job.identifier+'.root','recreate')
151 +            input.cd()
152 +            obj = ROOT.TObject
153 +            for key in ROOT.gDirectory.GetListOfKeys():
154 +                input.cd()
155 +                obj = key.ReadObj()
156 +                #print obj.GetName()
157 +                if obj.GetName() == job.tree:
158 +                    continue
159 +                outfile.cd()
160 +                #print key.GetName()
161 +                obj.Write(key.GetName())
162 +            tree = input.Get(job.tree)
163 +            nEntries = tree.GetEntries()
164 +            outfile.cd()
165 +            newtree = tree.CloneTree(0)
166 +            
167 +
168 +            #Set branch adress for all vars
169 +            for i in range(0,len(theMVAs)):
170 +                theMVAs[i].setBranches(tree,job)
171 +            outfile.cd()
172 +            #Setup Branches
173 +            MVAbranches=[]
174 +            for i in range(0,len(theMVAs)):
175 +                if job.type == 'Data':
176 +                    MVAbranches.append(array('f',[0]))
177 +                    newtree.Branch(MVAinfos[i].MVAname,MVAbranches[i],'nominal/F')
178 +                else:
179 +                    MVAbranches.append(array('f',[0]*11))
180 +                    newtree.Branch(theMVAs[i].MVAname,MVAbranches[i],'nominal:JER_up:JER_down:JES_up:JES_down:beff_up:beff_down:bmis_up:bmis_down:beff1_up:beff1_down/F')
181 +                MVA_formulas_Nominal = []
182 +            print '\n--> ' + job.name +':'
183 +            #progbar setup
184 +            if nEntries >= longe:
185 +                step=int(nEntries/longe)
186 +                long=longe
187 +            else:
188 +                long=nEntries
189 +                step = 1
190 +            bar=progbar(long)
191 +            #Fill event by event:
192 +            for entry in range(0,nEntries):
193 +                if entry % step == 0:
194 +                    bar.move()
195 +                #load entry
196 +                tree.GetEntry(entry)
197 +                            
198 +                for i in range(0,len(theMVAs)):
199 +                    theMVAs[i].evaluate(MVAbranches[i],job)
200 +                #Fill:
201 +                newtree.Fill()
202 +            newtree.AutoSave()
203 +            outfile.Close()
204 +                
205 + print '\n'
206  
207   #Update Info:
208   if doinfo:
209      for job in Ainfo:        
210          for MVAinfo in MVAinfos:
211              job.addcomment('Added MVA %s'%MVAinfo.MVAname)
212 <        job.addpath('/MVAout')
213 <    infofile = open(Apath+'/MVAout/samples.info','w')
212 >        job.addpath(MVAdir)
213 >    infofile = open(samplesinfo,'w')
214      pickle.dump(Ainfo,infofile)
215      infofile.close()
216  

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines