ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/workspace_datacard.py
Revision: 1.69
Committed: Tue Feb 5 10:02:03 2013 UTC (12 years, 3 months ago) by peller
Content type: text/x-python
Branch: MAIN
Changes since 1.68: +22 -8 lines
Log Message:
added rebinning

File Contents

# User Rev Content
1 peller 1.8 #!/usr/bin/env python
2 peller 1.53 import os, sys, ROOT, warnings, pickle
3 nmohr 1.68 ROOT.gROOT.SetBatch(True)
4 peller 1.1 from array import array
5     from math import sqrt
6 nmohr 1.68 from copy import copy, deepcopy
7 peller 1.1 #suppres the EvalInstace conversion warning bug
8     warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
9 nmohr 1.31 from optparse import OptionParser
10 nmohr 1.68 from myutils import BetterConfigParser, Sample, progbar, printc, ParseInfo, Rebinner, TreeCache, HistoMaker
11 peller 1.1
12 nmohr 1.68 #--CONFIGURE---------------------------------------------------------------------
13 nmohr 1.31 argv = sys.argv
14     parser = OptionParser()
15     parser.add_option("-V", "--var", dest="variable", default="",
16     help="variable for shape analysis")
17     parser.add_option("-C", "--config", dest="config", default=[], action="append",
18     help="configuration file")
19     (opts, args) = parser.parse_args(argv)
20 nmohr 1.20 config = BetterConfigParser()
21 nmohr 1.31 config.read(opts.config)
22 nmohr 1.68 var=opts.variable
23     #-------------------------------------------------------------------------------
24    
25     #--read variables from config---------------------------------------------------
26     # 7 or 8TeV Analysis
27 nmohr 1.31 anaTag = config.get("Analysis","tag")
28 nmohr 1.68 if not any([anaTag == '7TeV',anaTag == '8TeV']):
29     raise Exception("anaTag %s unknown. Specify 8TeV or 7TeV in the general config"%anaTag)
30     # Directories:
31 peller 1.1 Wdir=config.get('Directories','Wdir')
32 peller 1.42 vhbbpath=config.get('Directories','vhbbpath')
33 nmohr 1.46 samplesinfo=config.get('Directories','samplesinfo')
34 peller 1.67 path = config.get('Directories','dcSamples')
35 nmohr 1.68 outpath=config.get('Directories','limits')
36     try:
37     os.stat(outpath)
38     except:
39     os.mkdir(outpath)
40     # parse histogram config:
41     hist_conf=config.get('Limit',var)
42     options = hist_conf.split(',')
43 bortigno 1.7 if len(options) < 12:
44 nmohr 1.68 raise Exception("You have to choose option[11]: either Mjj or BDT")
45     treevar = options[0]
46     name = options[1]
47 peller 1.1 title = options[2]
48 nmohr 1.68 nBinsRB = int(options[3])
49     xMin = float(options[4])
50     xMax = float(options[5])
51     ROOToutname = options[6]
52     RCut = options[7]
53     SCut = options[8]
54     signals = options[9].split(' ')
55     datas = options[10].split(' ')
56     anType = options[11]
57 peller 1.54 nBins= int(config.get('LimitGeneral','BDTbinning'))
58 peller 1.29 setup=eval(config.get('LimitGeneral','setup'))
59 nmohr 1.68 #Systematics:
60     if config.has_option('LimitGeneral','addSample_sys'):
61     addSample_sys = eval(config.get('LimitGeneral','addSample_sys'))
62     additionals = [addSample_sys[key] for key in addSample_sys]
63     else:
64     addSample_sys = None
65     additionals = []
66     #find out if BDT or MJJ:
67     bdt = False
68     mjj = False
69     if str(anType) == 'BDT':
70     bdt = True
71     systematics = eval(config.get('LimitGeneral','sys_BDT'))
72     elif str(anType) == 'Mjj':
73     mjj = True
74     systematics = eval(config.get('LimitGeneral','sys_Mjj'))
75     sys_cut_suffix=eval(config.get('LimitGeneral','sys_cut_suffix'))
76     systematicsnaming = eval(config.get('LimitGeneral','systematicsnaming'))
77     sys_factor_dict = eval(config.get('LimitGeneral','sys_factor'))
78     sys_affecting = eval(config.get('LimitGeneral','sys_affecting'))
79     # weightF:
80     weightF = config.get('Weights','weightF')
81     weightF_sys = eval(config.get('LimitGeneral','weightF_sys'))
82 peller 1.58
83 nmohr 1.68 # get nominal cutstring:
84     treecut = config.get('Cuts',RCut)
85     # Train flag: splitting of samples
86     TrainFlag = eval(config.get('Analysis','TrainFlag'))
87     # scaling options
88     scaling=eval(config.get('LimitGeneral','scaling'))
89     rescaleSqrtN=eval(config.get('LimitGeneral','rescaleSqrtN'))
90     # blind data option:
91     blind=eval(config.get('LimitGeneral','blind'))
92     if blind:
93     printc('red','', 'I AM BLINDED!')
94     #get List of backgrounds in use:
95     backgrounds = eval(config.get('LimitGeneral','BKG'))
96     #Groups for adding samples together
97     Group = eval(config.get('LimitGeneral','Group'))
98     #naming for DC
99     Dict= eval(config.get('LimitGeneral','Dict'))
100     #treating statistics bin-by-bin:
101     binstat = eval(config.get('LimitGeneral','binstat'))
102     # Use the rebinning:
103     rebin_active=eval(config.get('LimitGeneral','rebin_active'))
104     max_rel = float(config.get('LimitGeneral','rebin_max_rel'))
105     signal_inject=config.get('LimitGeneral','signal_inject')
106     # add signal as background
107     add_signal_as_bkg=config.get('LimitGeneral','add_signal_as_bkg')
108     if not add_signal_as_bkg == 'None':
109     setup.append(add_signal_as_bkg)
110     #----------------------------------------------------------------------------
111 peller 1.65
112 nmohr 1.68 #--Setup--------------------------------------------------------------------
113     #Assign Pt region for sys factors
114 peller 1.58 if 'HighPtLooseBTag' in ROOToutname:
115     pt_region = 'HighPtLooseBTag'
116 peller 1.61 elif 'HighPt' in ROOToutname or 'highPt' in ROOToutname or 'medPt' in ROOToutname:
117 peller 1.58 pt_region = 'HighPt'
118 peller 1.60 elif 'LowPt' in ROOToutname or 'lowPt' in ROOToutname:
119 peller 1.58 pt_region = 'LowPt'
120 nmohr 1.68 elif 'ATLAS' in ROOToutname:
121     pt_region = 'HighPt'
122     elif 'Mjj' in ROOToutname:
123     pt_region = 'HighPt'
124     else:
125 peller 1.58 print "Unknown Pt region"
126     sys.exit("Unknown Pt region")
127 nmohr 1.68 # Set rescale factor of 2 in case of TrainFalg
128 peller 1.44 if TrainFlag:
129     MC_rescale_factor=2.
130     print 'I RESCALE BY 2.0'
131     else: MC_rescale_factor = 1.
132 nmohr 1.68 #systematics up/down
133     UD = ['Up','Down']
134     # rename Bins in DC (?)
135 peller 1.11 if 'RTight' in RCut:
136 peller 1.62 Datacardbin=options[10]
137 peller 1.11 elif 'RMed' in RCut:
138 peller 1.62 Datacardbin=options[10]
139 peller 1.11 else:
140 peller 1.62 Datacardbin=options[10]
141 nmohr 1.68 #Parse samples configuration
142     info = ParseInfo(samplesinfo,path)
143     # get all the treeCut sets
144     # create different sample Lists
145     all_samples = info.get_samples(signals+backgrounds+additionals)
146     signal_samples = info.get_samples(signals)
147     background_samples = info.get_samples(backgrounds)
148     data_samples = info.get_samples(datas)
149     #cache all samples
150     #t_cache = TreeCache(cuts,all_samples,path)
151     #cache datas
152     #d_cache = TreeCache(trecut,data_samples,path)
153     #-------------------------------------------------------------------------------------------------
154    
155     optionsList=[]
156    
157     def appendList(): optionsList.append({'cut':copy(_cut),'var':copy(_treevar),'name':copy(_name),'nBins':nBins,'xMin':xMin,'xMax':xMax,'weight':copy(_weight),'blind':copy(_blind)})
158    
159     #nominal
160     _cut = treecut
161     _treevar = treevar
162     _name = title
163     _weight = weightF
164     _blind = blind
165     appendList()
166    
167     #the 4 sys
168     for syst in systematics:
169     for Q in UD:
170     #default:
171     _cut = treecut
172     _name = title
173     _weight = weightF
174     #replace cut string
175     new_cut=sys_cut_suffix[syst]
176     if not new_cut == 'nominal':
177     old_str,new_str=new_cut.split('>')
178     _cut = treecut.replace(old_str,new_str.replace('?',Q))
179     _name = title
180     _weight = weightF
181     #replace tree variable
182     if bdt == True:
183     ff[1]='%s_%s'%(sys,Q.lower())
184     _treevar = nominalShape.replace('.nominal','.%s_%s'%(sys,Q.lower()))
185     elif mjj == True:
186     if sys == 'JER' or sys == 'JES':
187     _treevar = 'H_%s.mass_%s'%(sys,Q.lower())
188     else:
189     _treevar = treevar
190     #append
191     appendList()
192    
193     #UEPS
194     if weightF_sys:
195     for _weight in [config.get('Weights','weightF_sys_UP'),config.get('Weights','weightF_sys_DOWN')]:
196     _cut = treecut
197     _treevar = treevar
198     _name = title
199     appendList()
200    
201     #for option in optionsList:
202     # print option['cut']
203    
204    
205     mc_hMaker = HistoMaker(all_samples,path,config,optionsList)
206     data_hMaker = HistoMaker(data_samples,path,config,[optionsList[0]])
207 peller 1.36
208 nmohr 1.68 #Calculate lumi
209     lumi = 0.
210     nData = 0
211     for job in data_samples:
212     nData += 1
213     lumi += float(job.lumi)
214 peller 1.57
215 nmohr 1.68 if nData > 1:
216     lumi = lumi/float(nData)
217 nmohr 1.24
218 nmohr 1.68 mc_hMaker.lumi = lumi
219     data_hMaker.lumi = lumi
220 peller 1.62
221 peller 1.69 mc_hMaker.calc_rebin(background_samples)
222     #transfer rebinning info to data maker
223     data_hMaker.norebin_nBins = mc_hMaker.norebin_nBins
224     data_hMaker.rebin_nBins = mc_hMaker.rebin_nBins
225     data_hMaker.mybinning = mc_hMaker.mybinning
226    
227     data_hMaker.rebin = True
228    
229     #mc_hMaker.rebin = False
230     #data_hMaker.rebin = False
231    
232 nmohr 1.68 all_histos = {}
233     data_histos = {}
234 peller 1.53
235 nmohr 1.68 for job in all_samples:
236     all_histos[job.name] = mc_hMaker.get_histos_from_tree(job)
237 peller 1.53
238 nmohr 1.68 for job in data_samples:
239     data_histos[job.name] = data_hMaker.get_histos_from_tree(job)[0]['DATA']
240 peller 1.23
241 nmohr 1.68 nData = 0
242     for job in data_histos:
243     if nData == 0:
244     theData = data_histos[job]
245     else:
246     theData.Add(data_histos[i])
247 peller 1.11
248 nmohr 1.68 #-- Write Files-----------------------------------------------------------------------------------
249     # generate the TH outfile:
250     outfile = ROOT.TFile(outpath+'vhbb_TH_'+ROOToutname+'.root', 'RECREATE')
251     outfile.mkdir(Datacardbin,Datacardbin)
252     outfile.cd(Datacardbin)
253     # generate the Workspace:
254 peller 1.62 WS = ROOT.RooWorkspace('%s'%Datacardbin,'%s'%Datacardbin) #Zee
255 peller 1.1 print 'WS initialized'
256 bortigno 1.7 disc= ROOT.RooRealVar(name,name,xMin,xMax)
257 peller 1.1 obs = ROOT.RooArgList(disc)
258 nmohr 1.68 #
259 peller 1.1 ROOT.gROOT.SetStyle("Plain")
260 peller 1.53
261    
262 nmohr 1.68 # ToDo:
263 peller 1.53 #---- get the BKG for the rebinning calculation----
264 nmohr 1.68 #Rebinner.calculate_binning(hDummyRB,max_rel)
265     #myBinning=Rebinner(int(nBins),array('d',[-1.0]+[hDummyRB.GetBinLowEdge(i) for i in binlist]),rebin_active)
266 peller 1.53 #--------------------------------------------------
267    
268 nmohr 1.68 #order and add all together
269     final_histos = {}
270 peller 1.57
271 nmohr 1.68 print '\n\t--> Ordering and Adding Histos\n'
272 peller 1.53
273 nmohr 1.68 #NOMINAL:
274     final_histos['nominal'] = HistoMaker.orderandadd([all_histos['%s'%job][0] for job in all_samples],setup)
275 peller 1.53
276 nmohr 1.68 #SYSTEMATICS:
277     ind = 1
278     for syst in systematics:
279     for Q in UD:
280     final_histos['%s_%s'%(systematicsnaming[syst],Q)] = HistoMaker.orderandadd([all_histos['%s'%job][ind] for job in all_samples],setup)
281     ind+=1
282    
283     if weightF_sys:
284     for Q in UD:
285     final_histos['%s_%s'%(systematicsnaming['weightF_sys'],Q)]= HistoMaker.orderandadd([all_histos['%s'%job][ind] for job in all_samples],setup)
286     ind+=1
287    
288     #make statistical shapes:
289     for Q in UD:
290     final_histos['%s_%s'%(systematicsnaming['stats'],Q)] = {}
291     for job,hist in final_histos['nominal'].items():
292     for Q in UD:
293     final_histos['%s_%s'%(systematicsnaming['stats'],Q)][job] = hist.Clone()
294     for j in range(hist.GetNbinsX()+1):
295     if Q == 'Up':
296     final_histos['%s_%s'%(systematicsnaming['stats'],Q)][job].SetBinContent(j,max(0,hist.GetBinContent(j)+hist.GetBinError(j)))
297     if Q == 'Down':
298     final_histos['%s_%s'%(systematicsnaming['stats'],Q)][job].SetBinContent(j,max(0,hist.GetBinContent(j)-hist.GetBinError(j)))
299    
300     #write shapes in WS:
301     for key in final_histos:
302     for job, hist in final_histos[key].items():
303     if 'nominal' == key:
304     rooDataHist = ROOT.RooDataHist('%s' %(job),'%s'%(job),obs, hist)
305     getattr(WS,'import')(rooDataHist)
306     for Q in UD:
307     if Q in key:
308     theSyst = key.replace('_%s'%Q,'')
309     else:
310     continue
311     if 'stats' in key:
312     nameSyst = '%s_%s_%s' %(theSyst,job,Datacardbin)
313     else:
314     nameSyst = theSyst
315     rooDataHist = ROOT.RooDataHist('%s%s%s' %(job,nameSyst,Q),'%s%s%s'%(job,nameSyst,Q),obs, hist)
316     getattr(WS,'import')(rooDataHist)
317 peller 1.48
318 nmohr 1.68 rooDataHist = ROOT.RooDataHist('data_obs','data_obs',obs, theData)
319     getattr(WS,'import')(rooDataHist)
320 peller 1.13
321 nmohr 1.68 WS.writeToFile(outpath+'vhbb_WS_'+ROOToutname+'.root')
322 peller 1.47
323 nmohr 1.68 # now we have a Dict final_histos with sets of all grouped MCs for all systematics:
324     # nominal, ($SYS_Up/Down)*4, weightF_sys_Up/Down, stats_Up/Down
325 peller 1.36
326 peller 1.69 print '\n\t >>> PRINTOUT PRETTY TABLE <<<\n'
327     #header
328     printout = ''
329     printout += '%-25s'%'Process'
330     printout += ':'
331     for item, val in final_histos['nominal'].items():
332     printout += '%-10s'%item
333     print printout+'\n'
334 nmohr 1.68 for key in final_histos:
335     printout = ''
336 peller 1.69 printout += '%-25s'%key
337     printout += ':'
338 nmohr 1.68 for item, val in final_histos[key].items():
339 peller 1.69 printout += '%-10s'%'%0.5f'%val.Integral()
340 nmohr 1.68 print printout
341 peller 1.1
342 nmohr 1.68 #-----------------------------------------------------------------------------------------------------------
343 peller 1.36
344 peller 1.62 # -------------------- write DATAcard: ----------------------------------------------------------------------
345     DCprocessseparatordict = {'WS':':','TH':'/'}
346 nmohr 1.68 # create two datacards: for TH an WS
347 peller 1.62 for DCtype in ['WS','TH']:
348     columns=len(setup)
349     f = open(outpath+'vhbb_DC_%s_%s.txt'%(DCtype,ROOToutname),'w')
350     f.write('imax\t1\tnumber of channels\n')
351     f.write('jmax\t%s\tnumber of backgrounds (\'*\' = automatic)\n'%(columns-1))
352     f.write('kmax\t*\tnumber of nuisance parameters (sources of systematical uncertainties)\n\n')
353     f.write('shapes * * vhbb_%s_%s.root $CHANNEL%s$PROCESS $CHANNEL%s$PROCESS$SYSTEMATIC\n\n'%(DCtype,ROOToutname,DCprocessseparatordict[DCtype],DCprocessseparatordict[DCtype]))
354     f.write('bin\t%s\n\n'%Datacardbin)
355 nmohr 1.68 f.write('observation\t%s\n\n'%(int(theData.Integral())))
356     # datacard bin
357 peller 1.62 f.write('bin')
358     for c in range(0,columns): f.write('\t%s'%Datacardbin)
359     f.write('\n')
360 nmohr 1.68 # datacard process
361 peller 1.62 f.write('process')
362     for c in setup: f.write('\t%s'%Dict[c])
363     f.write('\n')
364     f.write('process')
365     for c in range(0,columns): f.write('\t%s'%c)
366     f.write('\n')
367 nmohr 1.68 # datacard yields
368 peller 1.62 f.write('rate')
369 nmohr 1.68 for c in setup:
370     f.write('\t%s'%final_histos['nominal'][c].Integral())
371 peller 1.29 f.write('\n')
372 nmohr 1.68 # get list of systematics in use
373 peller 1.62 InUse=eval(config.get('Datacard','InUse_%s'%pt_region))
374 nmohr 1.68 # write non-shape systematics
375 peller 1.62 for item in InUse:
376     f.write(item)
377     what=eval(config.get('Datacard',item))
378     f.write('\t%s'%what['type'])
379     for c in setup:
380     if c in what:
381     if item == 'CMS_eff_e' and 'Zmm' in options[10]: f.write('\t-')
382     elif item == 'CMS_eff_m' and 'Zee' in options[10]: f.write('\t-')
383     elif item == 'CMS_trigger_e' and 'Zmm' in options[10]: f.write('\t-')
384     elif item == 'CMS_trigger_m' and 'Zee' in options[10]: f.write('\t-')
385 peller 1.35 else:
386 peller 1.62 f.write('\t%s'%what[c])
387 peller 1.35 else:
388     f.write('\t-')
389     f.write('\n')
390 nmohr 1.68 # Write statistical shape variations
391 peller 1.62 if binstat:
392     for c in setup:
393     for bin in range(0,nBins):
394     f.write('CMS_vhbb_stats_%s_%s_%s\tshape'%(Dict[c], bin, options[10]))
395     for it in range(0,columns):
396     if it == setup.index(c):
397     f.write('\t1.0')
398     else:
399     f.write('\t-')
400     f.write('\n')
401     else:
402 nmohr 1.51 for c in setup:
403 peller 1.62 f.write('CMS_vhbb_stats_%s_%s\tshape'%(Dict[c], options[10]))
404 nmohr 1.37 for it in range(0,columns):
405     if it == setup.index(c):
406 peller 1.62 f.write('\t1.0')
407 nmohr 1.37 else:
408 peller 1.62 f.write('\t-')
409 nmohr 1.37 f.write('\n')
410 nmohr 1.68 # UEPS systematics
411 peller 1.62 if weightF_sys:
412     f.write('UEPS\tshape')
413     for it in range(0,columns): f.write('\t1.0')
414     f.write('\n')
415 nmohr 1.68 # additional sample systematics
416 peller 1.62 if addSample_sys:
417     alreadyAdded = []
418     for newSample in addSample_sys.iterkeys():
419     for c in setup:
420     if not c == Group[newSample]: continue
421     if Dict[c] in alreadyAdded: continue
422     f.write('CMS_vhbb_model_%s\tshape'%(Dict[c]))
423     for it in range(0,columns):
424     if it == setup.index(c):
425     f.write('\t1.0')
426     else:
427     f.write('\t-')
428     f.write('\n')
429     alreadyAdded.append(Dict[c])
430 nmohr 1.68 # regular systematics
431 peller 1.62 for sys in systematics:
432     sys_factor=sys_factor_dict[sys]
433     f.write('%s\tshape'%systematicsnaming[sys])
434 peller 1.63 for c in setup:
435     if c in sys_affecting[sys]:
436     f.write('\t%s'%sys_factor)
437     else:
438     f.write('\t-')
439 peller 1.62 f.write('\n')
440     f.close()
441 nmohr 1.68 # --------------------------------------------------------------------------
442    
443    
444    
445    
446 nmohr 1.31 outfile.Close()