VHbb/python/evaluateMVA.py

#!/usr/bin/env python
import sys
import os
import ROOT 
from array import array
from math import sqrt
from copy import copy
#suppres the EvalInstace conversion warning bug
import warnings
warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
from optparse import OptionParser
from BetterConfigParser import BetterConfigParser
import pickle

#CONFIGURE
ROOT.gROOT.SetBatch(True)
print 'hello'
#load config
#os.mkdir(path+'/sys')
argv = sys.argv
parser = OptionParser()
parser.add_option("-U", "--update", dest="update", default=0,
                      help="update infofile")
parser.add_option("-D", "--discr", dest="discr", default="",
                      help="discriminators to be added")
#parser.add_option("-I", "--inpath", dest="inpath", default="",
#                      help="path to samples")
#parser.add_option("-O", "--outpath", dest="outpath", default="",
#                      help="path where to store output samples")
parser.add_option("-S", "--samples", dest="names", default="",
                      help="samples you want to run on")
parser.add_option("-C", "--config", dest="config", default=[], action="append",
                      help="configuration file")
(opts, args) = parser.parse_args(argv)

from samplesclass import sample
from mvainfos import mvainfo
from progbar import progbar
from printcolor import printc

if opts.config =="":
        opts.config = "config"
config = BetterConfigParser()
#config.read('./config7TeV_ZZ')
config.read(opts.config)
anaTag = config.get("Analysis","tag")

#get locations:
Wdir=config.get('Directories','Wdir')
MVASubdir=config.get('Directories','MVAdir')
samplesinfo=config.get('Directories','samplesinfo')

#systematics


INpath = config.get('Directories','MVAin')
OUTpath = config.get('Directories','MVAout')

infofile = open(samplesinfo,'r')
info = pickle.load(infofile)
infofile.close()
arglist=opts.discr #RTight_blavla,bsbsb

namelistIN=opts.names
namelist=namelistIN.split(',')

doinfo=bool(int(opts.update))

MVAlist=arglist.split(',')


#CONFIG
#factory
factoryname=config.get('factory','factoryname')
#MVA
MVAinfos=[]
MVAdir=config.get('Directories','vhbbpath')
for MVAname in MVAlist:
    MVAinfofile = open(MVAdir+'/data/'+factoryname+'_'+MVAname+'.info','r')
    MVAinfos.append(pickle.load(MVAinfofile))
    MVAinfofile.close()
    
longe=40
#Workdir
workdir=ROOT.gDirectory.GetPath()

#Apply samples
infofile = open(samplesinfo,'r')
Ainfo = pickle.load(infofile)
infofile.close()


class MvaEvaluater:
    def __init__(self, config, MVAinfo):
        self.varset = MVAinfo.varset
        #Define reader
        self.reader = ROOT.TMVA.Reader("!Color:!Silent")
        MVAdir=config.get('Directories','vhbbpath')
        self.systematics=config.get('systematics','systematics').split(' ')
        self.MVA_Vars={}
        self.MVAname = MVAinfo.MVAname
        for systematic in self.systematics:
            self.MVA_Vars[systematic]=config.get(self.varset,systematic)
            self.MVA_Vars[systematic]=self.MVA_Vars[systematic].split(' ')
        #define variables and specatators
        self.MVA_var_buffer = []
        for i in range(len( self.MVA_Vars['Nominal'])):
            self.MVA_var_buffer.append(array( 'f', [ 0 ] ))
            self.reader.AddVariable( self.MVA_Vars['Nominal'][i],self.MVA_var_buffer[i])
        self.reader.BookMVA(MVAinfo.MVAname,MVAdir+'/data/'+MVAinfo.getweightfile())
        #--> Now the MVA is booked

    def setBranches(self,tree,job):
        #Set formulas for all vars
        self.MVA_formulas={}
        for systematic in self.systematics: 
            if job.type == 'DATA' and not systematic == 'Nominal': continue
            self.MVA_formulas[systematic]=[]
            for j in range(len( self.MVA_Vars['Nominal'])):
                self.MVA_formulas[systematic].append(ROOT.TTreeFormula("MVA_formula%s_%s"%(j,systematic),self.MVA_Vars[systematic][j],tree))

    def evaluate(self,MVAbranches,job):
        #Evaluate all vars and fill the branches
        for systematic in self.systematics:
            for j in range(len( self.MVA_Vars['Nominal'])):
                if job.type == 'DATA' and not systematic == 'Nominal': continue
                self.MVA_var_buffer[j][0] = self.MVA_formulas[systematic][j].EvalInstance()                
            MVAbranches[self.systematics.index(systematic)] = self.reader.EvaluateMVA(self.MVAname)


theMVAs = []
for mva in MVAinfos:
    theMVAs.append(MvaEvaluater(config,mva))


#eval
for job in Ainfo:
    if eval(job.active):
        if job.name in namelist:
            #get trees:
            print INpath+'/'+job.prefix+job.identifier+'.root'
            input = ROOT.TFile.Open(INpath+'/'+job.prefix+job.identifier+'.root','read')
            print OUTpath+'/'+job.prefix+job.identifier+'.root'
            outfile = ROOT.TFile.Open(OUTpath+'/'+job.prefix+job.identifier+'.root','recreate')
            input.cd()
            obj = ROOT.TObject
            for key in ROOT.gDirectory.GetListOfKeys():
                input.cd()
                obj = key.ReadObj()
                #print obj.GetName()
                if obj.GetName() == job.tree:
                    continue
                outfile.cd()
                #print key.GetName()
                obj.Write(key.GetName())
            tree = input.Get(job.tree)
            nEntries = tree.GetEntries()
            outfile.cd()
            newtree = tree.CloneTree(0)
            

            #Set branch adress for all vars
            for i in range(0,len(theMVAs)):
                theMVAs[i].setBranches(tree,job)
            outfile.cd()
            #Setup Branches
            MVAbranches=[]
            for i in range(0,len(theMVAs)):
                if job.type == 'Data':
                    MVAbranches.append(array('f',[0]))
                    newtree.Branch(MVAinfos[i].MVAname,MVAbranches[i],'nominal/F') 
                else:
                    MVAbranches.append(array('f',[0]*11))
                    newtree.Branch(theMVAs[i].MVAname,MVAbranches[i],'nominal:JER_up:JER_down:JES_up:JES_down:beff_up:beff_down:bmis_up:bmis_down:beff1_up:beff1_down/F')
                MVA_formulas_Nominal = []
            print '\n--> ' + job.name +':'
            #progbar setup
            if nEntries >= longe:
                step=int(nEntries/longe)
                long=longe
            else:
                long=nEntries
                step = 1
            bar=progbar(long)
            #Fill event by event:
            for entry in range(0,nEntries):
                if entry % step == 0:
                    bar.move()
                #load entry
                tree.GetEntry(entry)
                            
                for i in range(0,len(theMVAs)):
                    theMVAs[i].evaluate(MVAbranches[i],job)
                #Fill:
                newtree.Fill()
            newtree.AutoSave()
            outfile.Close()
                
print '\n'

#Update Info:
if doinfo:
    for job in Ainfo:        
        for MVAinfo in MVAinfos:
            job.addcomment('Added MVA %s'%MVAinfo.MVAname)
        job.addpath(MVAdir)
    infofile = open(samplesinfo,'w')
    pickle.dump(Ainfo,infofile)
    infofile.close()


Revision:	1.18
Committed:	Wed Nov 28 13:04:59 2012 UTC (12 years, 5 months ago) by nmohr
Content type:	text/x-python
Branch:	MAIN
Changes since 1.17:	+89 -148 lines
Log Message:	Different vars in MVA evaluation
#	Content
1	#!/usr/bin/env python
2	import sys
3	import os
4	import ROOT
5	from array import array
6	from math import sqrt
7	from copy import copy
8	#suppres the EvalInstace conversion warning bug
9	import warnings
10	warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
11	from optparse import OptionParser
12	from BetterConfigParser import BetterConfigParser
13	import pickle
14
15	#CONFIGURE
16	ROOT.gROOT.SetBatch(True)
17	print 'hello'
18	#load config
19	#os.mkdir(path+'/sys')
20	argv = sys.argv
21	parser = OptionParser()
22	parser.add_option("-U", "--update", dest="update", default=0,
23	help="update infofile")
24	parser.add_option("-D", "--discr", dest="discr", default="",
25	help="discriminators to be added")
26	#parser.add_option("-I", "--inpath", dest="inpath", default="",
27	# help="path to samples")
28	#parser.add_option("-O", "--outpath", dest="outpath", default="",
29	# help="path where to store output samples")
30	parser.add_option("-S", "--samples", dest="names", default="",
31	help="samples you want to run on")
32	parser.add_option("-C", "--config", dest="config", default=[], action="append",
33	help="configuration file")
34	(opts, args) = parser.parse_args(argv)
35
36	from samplesclass import sample
37	from mvainfos import mvainfo
38	from progbar import progbar
39	from printcolor import printc
40
41	if opts.config =="":
42	opts.config = "config"
43	config = BetterConfigParser()
44	#config.read('./config7TeV_ZZ')
45	config.read(opts.config)
46	anaTag = config.get("Analysis","tag")
47
48	#get locations:
49	Wdir=config.get('Directories','Wdir')
50	MVASubdir=config.get('Directories','MVAdir')
51	samplesinfo=config.get('Directories','samplesinfo')
52
53	#systematics
54
55
56	INpath = config.get('Directories','MVAin')
57	OUTpath = config.get('Directories','MVAout')
58
59	infofile = open(samplesinfo,'r')
60	info = pickle.load(infofile)
61	infofile.close()
62	arglist=opts.discr #RTight_blavla,bsbsb
63
64	namelistIN=opts.names
65	namelist=namelistIN.split(',')
66
67	doinfo=bool(int(opts.update))
68
69	MVAlist=arglist.split(',')
70
71
72	#CONFIG
73	#factory
74	factoryname=config.get('factory','factoryname')
75	#MVA
76	MVAinfos=[]
77	MVAdir=config.get('Directories','vhbbpath')
78	for MVAname in MVAlist:
79	MVAinfofile = open(MVAdir+'/data/'+factoryname+'_'+MVAname+'.info','r')
80	MVAinfos.append(pickle.load(MVAinfofile))
81	MVAinfofile.close()
82
83	longe=40
84	#Workdir
85	workdir=ROOT.gDirectory.GetPath()
86
87	#Apply samples
88	infofile = open(samplesinfo,'r')
89	Ainfo = pickle.load(infofile)
90	infofile.close()
91
92
93	class MvaEvaluater:
94	def __init__(self, config, MVAinfo):
95	self.varset = MVAinfo.varset
96	#Define reader
97	self.reader = ROOT.TMVA.Reader("!Color:!Silent")
98	MVAdir=config.get('Directories','vhbbpath')
99	self.systematics=config.get('systematics','systematics').split(' ')
100	self.MVA_Vars={}
101	self.MVAname = MVAinfo.MVAname
102	for systematic in self.systematics:
103	self.MVA_Vars[systematic]=config.get(self.varset,systematic)
104	self.MVA_Vars[systematic]=self.MVA_Vars[systematic].split(' ')
105	#define variables and specatators
106	self.MVA_var_buffer = []
107	for i in range(len( self.MVA_Vars['Nominal'])):
108	self.MVA_var_buffer.append(array( 'f', [ 0 ] ))
109	self.reader.AddVariable( self.MVA_Vars['Nominal'][i],self.MVA_var_buffer[i])
110	self.reader.BookMVA(MVAinfo.MVAname,MVAdir+'/data/'+MVAinfo.getweightfile())
111	#--> Now the MVA is booked
112
113	def setBranches(self,tree,job):
114	#Set formulas for all vars
115	self.MVA_formulas={}
116	for systematic in self.systematics:
117	if job.type == 'DATA' and not systematic == 'Nominal': continue
118	self.MVA_formulas[systematic]=[]
119	for j in range(len( self.MVA_Vars['Nominal'])):
120	self.MVA_formulas[systematic].append(ROOT.TTreeFormula("MVA_formula%s_%s"%(j,systematic),self.MVA_Vars[systematic][j],tree))
121
122	def evaluate(self,MVAbranches,job):
123	#Evaluate all vars and fill the branches
124	for systematic in self.systematics:
125	for j in range(len( self.MVA_Vars['Nominal'])):
126	if job.type == 'DATA' and not systematic == 'Nominal': continue
127	self.MVA_var_buffer[j][0] = self.MVA_formulas[systematic][j].EvalInstance()
128	MVAbranches[self.systematics.index(systematic)] = self.reader.EvaluateMVA(self.MVAname)
129
130
131	theMVAs = []
132	for mva in MVAinfos:
133	theMVAs.append(MvaEvaluater(config,mva))
134
135
136	#eval
137	for job in Ainfo:
138	if eval(job.active):
139	if job.name in namelist:
140	#get trees:
141	print INpath+'/'+job.prefix+job.identifier+'.root'
142	input = ROOT.TFile.Open(INpath+'/'+job.prefix+job.identifier+'.root','read')
143	print OUTpath+'/'+job.prefix+job.identifier+'.root'
144	outfile = ROOT.TFile.Open(OUTpath+'/'+job.prefix+job.identifier+'.root','recreate')
145	input.cd()
146	obj = ROOT.TObject
147	for key in ROOT.gDirectory.GetListOfKeys():
148	input.cd()
149	obj = key.ReadObj()
150	#print obj.GetName()
151	if obj.GetName() == job.tree:
152	continue
153	outfile.cd()
154	#print key.GetName()
155	obj.Write(key.GetName())
156	tree = input.Get(job.tree)
157	nEntries = tree.GetEntries()
158	outfile.cd()
159	newtree = tree.CloneTree(0)
160
161
162	#Set branch adress for all vars
163	for i in range(0,len(theMVAs)):
164	theMVAs[i].setBranches(tree,job)
165	outfile.cd()
166	#Setup Branches
167	MVAbranches=[]
168	for i in range(0,len(theMVAs)):
169	if job.type == 'Data':
170	MVAbranches.append(array('f',[0]))
171	newtree.Branch(MVAinfos[i].MVAname,MVAbranches[i],'nominal/F')
172	else:
173	MVAbranches.append(array('f',[0]*11))
174	newtree.Branch(theMVAs[i].MVAname,MVAbranches[i],'nominal:JER_up:JER_down:JES_up:JES_down:beff_up:beff_down:bmis_up:bmis_down:beff1_up:beff1_down/F')
175	MVA_formulas_Nominal = []
176	print '\n--> ' + job.name +':'
177	#progbar setup
178	if nEntries >= longe:
179	step=int(nEntries/longe)
180	long=longe
181	else:
182	long=nEntries
183	step = 1
184	bar=progbar(long)
185	#Fill event by event:
186	for entry in range(0,nEntries):
187	if entry % step == 0:
188	bar.move()
189	#load entry
190	tree.GetEntry(entry)
191
192	for i in range(0,len(theMVAs)):
193	theMVAs[i].evaluate(MVAbranches[i],job)
194	#Fill:
195	newtree.Fill()
196	newtree.AutoSave()
197	outfile.Close()
198
199	print '\n'
200
201	#Update Info:
202	if doinfo:
203	for job in Ainfo:
204	for MVAinfo in MVAinfos:
205	job.addcomment('Added MVA %s'%MVAinfo.MVAname)
206	job.addpath(MVAdir)
207	infofile = open(samplesinfo,'w')
208	pickle.dump(Ainfo,infofile)
209	infofile.close()
210
211