ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/workspace_datacard.py
Revision: 1.74
Committed: Wed Feb 6 12:55:04 2013 UTC (12 years, 3 months ago) by peller
Content type: text/x-python
Branch: MAIN
Changes since 1.73: +13 -7 lines
Log Message:
bugfix

File Contents

# User Rev Content
1 peller 1.8 #!/usr/bin/env python
2 peller 1.53 import os, sys, ROOT, warnings, pickle
3 nmohr 1.68 ROOT.gROOT.SetBatch(True)
4 peller 1.1 from array import array
5     from math import sqrt
6 nmohr 1.68 from copy import copy, deepcopy
7 peller 1.1 #suppres the EvalInstace conversion warning bug
8     warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
9 nmohr 1.31 from optparse import OptionParser
10 nmohr 1.68 from myutils import BetterConfigParser, Sample, progbar, printc, ParseInfo, Rebinner, TreeCache, HistoMaker
11 peller 1.1
12 nmohr 1.68 #--CONFIGURE---------------------------------------------------------------------
13 nmohr 1.31 argv = sys.argv
14     parser = OptionParser()
15     parser.add_option("-V", "--var", dest="variable", default="",
16     help="variable for shape analysis")
17     parser.add_option("-C", "--config", dest="config", default=[], action="append",
18     help="configuration file")
19     (opts, args) = parser.parse_args(argv)
20 nmohr 1.20 config = BetterConfigParser()
21 nmohr 1.31 config.read(opts.config)
22 nmohr 1.68 var=opts.variable
23     #-------------------------------------------------------------------------------
24    
25     #--read variables from config---------------------------------------------------
26     # 7 or 8TeV Analysis
27 nmohr 1.31 anaTag = config.get("Analysis","tag")
28 nmohr 1.68 if not any([anaTag == '7TeV',anaTag == '8TeV']):
29     raise Exception("anaTag %s unknown. Specify 8TeV or 7TeV in the general config"%anaTag)
30     # Directories:
31 peller 1.1 Wdir=config.get('Directories','Wdir')
32 peller 1.42 vhbbpath=config.get('Directories','vhbbpath')
33 nmohr 1.46 samplesinfo=config.get('Directories','samplesinfo')
34 peller 1.67 path = config.get('Directories','dcSamples')
35 nmohr 1.68 outpath=config.get('Directories','limits')
36     try:
37     os.stat(outpath)
38     except:
39     os.mkdir(outpath)
40     # parse histogram config:
41     hist_conf=config.get('Limit',var)
42     options = hist_conf.split(',')
43 bortigno 1.7 if len(options) < 12:
44 nmohr 1.68 raise Exception("You have to choose option[11]: either Mjj or BDT")
45     treevar = options[0]
46     name = options[1]
47 peller 1.1 title = options[2]
48 peller 1.72 nBins = int(options[3])
49 nmohr 1.68 xMin = float(options[4])
50     xMax = float(options[5])
51     ROOToutname = options[6]
52     RCut = options[7]
53     SCut = options[8]
54     signals = options[9].split(' ')
55     datas = options[10].split(' ')
56     anType = options[11]
57 peller 1.29 setup=eval(config.get('LimitGeneral','setup'))
58 nmohr 1.68 #Systematics:
59     if config.has_option('LimitGeneral','addSample_sys'):
60     addSample_sys = eval(config.get('LimitGeneral','addSample_sys'))
61     additionals = [addSample_sys[key] for key in addSample_sys]
62     else:
63     addSample_sys = None
64     additionals = []
65     #find out if BDT or MJJ:
66     bdt = False
67     mjj = False
68     if str(anType) == 'BDT':
69     bdt = True
70     systematics = eval(config.get('LimitGeneral','sys_BDT'))
71     elif str(anType) == 'Mjj':
72     mjj = True
73     systematics = eval(config.get('LimitGeneral','sys_Mjj'))
74     sys_cut_suffix=eval(config.get('LimitGeneral','sys_cut_suffix'))
75     systematicsnaming = eval(config.get('LimitGeneral','systematicsnaming'))
76     sys_factor_dict = eval(config.get('LimitGeneral','sys_factor'))
77     sys_affecting = eval(config.get('LimitGeneral','sys_affecting'))
78     # weightF:
79     weightF = config.get('Weights','weightF')
80     weightF_sys = eval(config.get('LimitGeneral','weightF_sys'))
81 peller 1.58
82 nmohr 1.68 # get nominal cutstring:
83     treecut = config.get('Cuts',RCut)
84     # Train flag: splitting of samples
85     TrainFlag = eval(config.get('Analysis','TrainFlag'))
86     # blind data option:
87     blind=eval(config.get('LimitGeneral','blind'))
88     if blind:
89     printc('red','', 'I AM BLINDED!')
90     #get List of backgrounds in use:
91     backgrounds = eval(config.get('LimitGeneral','BKG'))
92     #Groups for adding samples together
93     Group = eval(config.get('LimitGeneral','Group'))
94     #naming for DC
95     Dict= eval(config.get('LimitGeneral','Dict'))
96     #treating statistics bin-by-bin:
97     binstat = eval(config.get('LimitGeneral','binstat'))
98     # Use the rebinning:
99     rebin_active=eval(config.get('LimitGeneral','rebin_active'))
100 peller 1.72 #max_rel = float(config.get('LimitGeneral','rebin_max_rel'))
101 nmohr 1.68 signal_inject=config.get('LimitGeneral','signal_inject')
102     # add signal as background
103     add_signal_as_bkg=config.get('LimitGeneral','add_signal_as_bkg')
104     if not add_signal_as_bkg == 'None':
105     setup.append(add_signal_as_bkg)
106     #----------------------------------------------------------------------------
107 peller 1.65
108 nmohr 1.68 #--Setup--------------------------------------------------------------------
109     #Assign Pt region for sys factors
110 peller 1.58 if 'HighPtLooseBTag' in ROOToutname:
111     pt_region = 'HighPtLooseBTag'
112 peller 1.61 elif 'HighPt' in ROOToutname or 'highPt' in ROOToutname or 'medPt' in ROOToutname:
113 peller 1.58 pt_region = 'HighPt'
114 peller 1.60 elif 'LowPt' in ROOToutname or 'lowPt' in ROOToutname:
115 peller 1.58 pt_region = 'LowPt'
116 nmohr 1.68 elif 'ATLAS' in ROOToutname:
117     pt_region = 'HighPt'
118     elif 'Mjj' in ROOToutname:
119     pt_region = 'HighPt'
120     else:
121 peller 1.58 print "Unknown Pt region"
122     sys.exit("Unknown Pt region")
123 nmohr 1.68 # Set rescale factor of 2 in case of TrainFalg
124 peller 1.44 if TrainFlag:
125     MC_rescale_factor=2.
126     print 'I RESCALE BY 2.0'
127     else: MC_rescale_factor = 1.
128 nmohr 1.68 #systematics up/down
129     UD = ['Up','Down']
130     # rename Bins in DC (?)
131 peller 1.11 if 'RTight' in RCut:
132 peller 1.62 Datacardbin=options[10]
133 peller 1.11 elif 'RMed' in RCut:
134 peller 1.62 Datacardbin=options[10]
135 peller 1.11 else:
136 peller 1.62 Datacardbin=options[10]
137 nmohr 1.68 #Parse samples configuration
138     info = ParseInfo(samplesinfo,path)
139     # get all the treeCut sets
140     # create different sample Lists
141     all_samples = info.get_samples(signals+backgrounds+additionals)
142     signal_samples = info.get_samples(signals)
143     background_samples = info.get_samples(backgrounds)
144     data_samples = info.get_samples(datas)
145     #cache all samples
146     #t_cache = TreeCache(cuts,all_samples,path)
147     #cache datas
148     #d_cache = TreeCache(trecut,data_samples,path)
149     #-------------------------------------------------------------------------------------------------
150    
151     optionsList=[]
152    
153     def appendList(): optionsList.append({'cut':copy(_cut),'var':copy(_treevar),'name':copy(_name),'nBins':nBins,'xMin':xMin,'xMax':xMax,'weight':copy(_weight),'blind':copy(_blind)})
154    
155     #nominal
156     _cut = treecut
157     _treevar = treevar
158     _name = title
159     _weight = weightF
160     _blind = blind
161     appendList()
162    
163     #the 4 sys
164     for syst in systematics:
165     for Q in UD:
166     #default:
167     _cut = treecut
168     _name = title
169     _weight = weightF
170     #replace cut string
171     new_cut=sys_cut_suffix[syst]
172     if not new_cut == 'nominal':
173     old_str,new_str=new_cut.split('>')
174     _cut = treecut.replace(old_str,new_str.replace('?',Q))
175     _name = title
176     _weight = weightF
177     #replace tree variable
178     if bdt == True:
179     ff[1]='%s_%s'%(sys,Q.lower())
180     _treevar = nominalShape.replace('.nominal','.%s_%s'%(sys,Q.lower()))
181     elif mjj == True:
182     if sys == 'JER' or sys == 'JES':
183     _treevar = 'H_%s.mass_%s'%(sys,Q.lower())
184     else:
185     _treevar = treevar
186     #append
187     appendList()
188    
189     #UEPS
190     if weightF_sys:
191     for _weight in [config.get('Weights','weightF_sys_UP'),config.get('Weights','weightF_sys_DOWN')]:
192     _cut = treecut
193     _treevar = treevar
194     _name = title
195     appendList()
196    
197     #for option in optionsList:
198     # print option['cut']
199    
200    
201     mc_hMaker = HistoMaker(all_samples,path,config,optionsList)
202     data_hMaker = HistoMaker(data_samples,path,config,[optionsList[0]])
203     #Calculate lumi
204     lumi = 0.
205     nData = 0
206     for job in data_samples:
207     nData += 1
208     lumi += float(job.lumi)
209 peller 1.57
210 nmohr 1.68 if nData > 1:
211     lumi = lumi/float(nData)
212 nmohr 1.24
213 nmohr 1.68 mc_hMaker.lumi = lumi
214     data_hMaker.lumi = lumi
215 peller 1.62
216 peller 1.69
217 peller 1.72 if rebin_active:
218     mc_hMaker.calc_rebin(background_samples)
219     #transfer rebinning info to data maker
220 peller 1.74 data_hMaker.norebin_nBins = copy(mc_hMaker.norebin_nBins)
221     data_hMaker.rebin_nBins = copy(mc_hMaker.rebin_nBins)
222     data_hMaker.mybinning = deepcopy(mc_hMaker.mybinning)
223 peller 1.72 data_hMaker.rebin = True
224 peller 1.69
225     #mc_hMaker.rebin = False
226     #data_hMaker.rebin = False
227    
228 nmohr 1.68 all_histos = {}
229     data_histos = {}
230 peller 1.53
231 nmohr 1.68 for job in all_samples:
232     all_histos[job.name] = mc_hMaker.get_histos_from_tree(job)
233 peller 1.53
234 nmohr 1.68 for job in data_samples:
235     data_histos[job.name] = data_hMaker.get_histos_from_tree(job)[0]['DATA']
236 peller 1.23
237 nmohr 1.68 nData = 0
238     for job in data_histos:
239     if nData == 0:
240     theData = data_histos[job]
241     else:
242     theData.Add(data_histos[i])
243 peller 1.11
244 nmohr 1.68 #-- Write Files-----------------------------------------------------------------------------------
245     # generate the TH outfile:
246     outfile = ROOT.TFile(outpath+'vhbb_TH_'+ROOToutname+'.root', 'RECREATE')
247     outfile.mkdir(Datacardbin,Datacardbin)
248     outfile.cd(Datacardbin)
249     # generate the Workspace:
250 peller 1.62 WS = ROOT.RooWorkspace('%s'%Datacardbin,'%s'%Datacardbin) #Zee
251 peller 1.1 print 'WS initialized'
252 bortigno 1.7 disc= ROOT.RooRealVar(name,name,xMin,xMax)
253 peller 1.1 obs = ROOT.RooArgList(disc)
254 nmohr 1.68 #
255 peller 1.1 ROOT.gROOT.SetStyle("Plain")
256 peller 1.53
257    
258 nmohr 1.68 # ToDo:
259 peller 1.53 #---- get the BKG for the rebinning calculation----
260 nmohr 1.68 #Rebinner.calculate_binning(hDummyRB,max_rel)
261     #myBinning=Rebinner(int(nBins),array('d',[-1.0]+[hDummyRB.GetBinLowEdge(i) for i in binlist]),rebin_active)
262 peller 1.53 #--------------------------------------------------
263    
264 nmohr 1.68 #order and add all together
265     final_histos = {}
266 peller 1.57
267 nmohr 1.68 print '\n\t--> Ordering and Adding Histos\n'
268 peller 1.53
269 nmohr 1.68 #NOMINAL:
270     final_histos['nominal'] = HistoMaker.orderandadd([all_histos['%s'%job][0] for job in all_samples],setup)
271 peller 1.53
272 nmohr 1.68 #SYSTEMATICS:
273     ind = 1
274     for syst in systematics:
275     for Q in UD:
276 nmohr 1.71 final_histos['%s_%s'%(systematicsnaming[syst],Q)] = HistoMaker.orderandadd([all_histos[job.name][ind] for job in all_samples],setup)
277 nmohr 1.68 ind+=1
278    
279     if weightF_sys:
280     for Q in UD:
281 nmohr 1.71 final_histos['%s_%s'%(systematicsnaming['weightF_sys'],Q)]= HistoMaker.orderandadd([all_histos[job.name][ind] for job in all_samples],setup)
282 nmohr 1.68 ind+=1
283    
284 nmohr 1.71 def get_alternate_shape(hNominal,hAlternate):
285     hVar = hAlternate.Clone()
286     hNom = hNominal.Clone()
287     hAlt = hNom.Clone()
288     hNom.Add(hVar,-1.)
289     hAlt.Add(hNom)
290     for bin in range(0,hNominal.GetNbinsX()+1):
291     if hAlt.GetBinContent(bin) < 0.: hAlt.SetBinContent(bin,0.)
292     return hVar,hAlt
293    
294     def get_alternate_shapes(all_histos,asample_dict,all_samples):
295     alternate_shapes_up = []
296     alternate_shapes_down = []
297     for job in all_samples:
298     nominal = all_histos[job.name][0]
299     if job.name in asample_dict:
300 peller 1.74 print 'calc add shape %s'%job
301 nmohr 1.71 alternate = copy(all_histos[asample_dict[job.name]][0])
302     hUp, hDown = get_alternate_shape(nominal[nominal.keys()[0]],alternate[alternate.keys()[0]])
303     alternate_shapes_up.append({nominal.keys()[0]:hUp})
304     alternate_shapes_down.append({nominal.keys()[0]:hDown})
305     else:
306 peller 1.74 print 'copy add shape %s'%job
307     #hUp, hDown = get_alternate_shape(nominal[nominal.keys()[0]],nominal[nominal.keys()[0]])
308     #alternate_shapes_up.append({nominal.keys()[0]:hUp})
309     #alternate_shapes_down.append({nominal.keys()[0]:hDown})
310     newh=nominal[nominal.keys()[0]].Clone('%s_%s_Up'%(nominal[nominal.keys()[0]].GetName(),'model'))
311     alternate_shapes_up.append({nominal.keys()[0]:nominal[nominal.keys()[0]].Clone()})
312     alternate_shapes_down.append({nominal.keys()[0]:nominal[nominal.keys()[0]].Clone()})
313 nmohr 1.71 return alternate_shapes_up, alternate_shapes_down
314    
315     if addSample_sys:
316     aUp, aDown = get_alternate_shapes(all_histos,addSample_sys,all_samples)
317     final_histos['%s_Up'%(systematicsnaming['model'])]= HistoMaker.orderandadd(aUp,setup)
318 peller 1.74 del aUp
319 nmohr 1.71 final_histos['%s_Down'%(systematicsnaming['model'])]= HistoMaker.orderandadd(aDown,setup)
320    
321 nmohr 1.68 #make statistical shapes:
322     for Q in UD:
323     final_histos['%s_%s'%(systematicsnaming['stats'],Q)] = {}
324     for job,hist in final_histos['nominal'].items():
325     for Q in UD:
326     final_histos['%s_%s'%(systematicsnaming['stats'],Q)][job] = hist.Clone()
327     for j in range(hist.GetNbinsX()+1):
328     if Q == 'Up':
329     final_histos['%s_%s'%(systematicsnaming['stats'],Q)][job].SetBinContent(j,max(0,hist.GetBinContent(j)+hist.GetBinError(j)))
330     if Q == 'Down':
331     final_histos['%s_%s'%(systematicsnaming['stats'],Q)][job].SetBinContent(j,max(0,hist.GetBinContent(j)-hist.GetBinError(j)))
332    
333     #write shapes in WS:
334     for key in final_histos:
335     for job, hist in final_histos[key].items():
336     if 'nominal' == key:
337 nmohr 1.71 hist.SetName('%s'%(Dict[job]))
338 peller 1.70 hist.Write()
339 nmohr 1.71 rooDataHist = ROOT.RooDataHist('%s' %(Dict[job]),'%s'%(Dict[job]),obs, hist)
340 nmohr 1.68 getattr(WS,'import')(rooDataHist)
341     for Q in UD:
342     if Q in key:
343     theSyst = key.replace('_%s'%Q,'')
344     else:
345     continue
346 nmohr 1.71 if systematicsnaming['stats'] in key:
347     nameSyst = '%s_%s_%s' %(theSyst,Dict[job],Datacardbin)
348     elif systematicsnaming['model'] in key:
349     nameSyst = '%s_%s' %(theSyst,Dict[job])
350 nmohr 1.68 else:
351     nameSyst = theSyst
352 nmohr 1.71 hist.SetName('%s%s%s' %(Dict[job],nameSyst,Q))
353 peller 1.70 hist.Write()
354 nmohr 1.71 rooDataHist = ROOT.RooDataHist('%s%s%s' %(Dict[job],nameSyst,Q),'%s%s%s'%(Dict[job],nameSyst,Q),obs, hist)
355 nmohr 1.68 getattr(WS,'import')(rooDataHist)
356 peller 1.48
357 peller 1.70 theData.SetName('data_obs')
358     theData.Write()
359 nmohr 1.68 rooDataHist = ROOT.RooDataHist('data_obs','data_obs',obs, theData)
360     getattr(WS,'import')(rooDataHist)
361 peller 1.13
362 nmohr 1.68 WS.writeToFile(outpath+'vhbb_WS_'+ROOToutname+'.root')
363 peller 1.47
364 nmohr 1.68 # now we have a Dict final_histos with sets of all grouped MCs for all systematics:
365     # nominal, ($SYS_Up/Down)*4, weightF_sys_Up/Down, stats_Up/Down
366 peller 1.36
367 peller 1.69 print '\n\t >>> PRINTOUT PRETTY TABLE <<<\n'
368     #header
369     printout = ''
370     printout += '%-25s'%'Process'
371     printout += ':'
372     for item, val in final_histos['nominal'].items():
373 peller 1.70 printout += '%-12s'%item
374 peller 1.69 print printout+'\n'
375 nmohr 1.68 for key in final_histos:
376     printout = ''
377 peller 1.69 printout += '%-25s'%key
378     printout += ':'
379 nmohr 1.68 for item, val in final_histos[key].items():
380 peller 1.74 printout += '%-12s'%str('%0.5f'%val.Integral())
381 nmohr 1.68 print printout
382 peller 1.1
383 nmohr 1.68 #-----------------------------------------------------------------------------------------------------------
384 peller 1.36
385 peller 1.62 # -------------------- write DATAcard: ----------------------------------------------------------------------
386     DCprocessseparatordict = {'WS':':','TH':'/'}
387 nmohr 1.68 # create two datacards: for TH an WS
388 peller 1.62 for DCtype in ['WS','TH']:
389     columns=len(setup)
390     f = open(outpath+'vhbb_DC_%s_%s.txt'%(DCtype,ROOToutname),'w')
391     f.write('imax\t1\tnumber of channels\n')
392     f.write('jmax\t%s\tnumber of backgrounds (\'*\' = automatic)\n'%(columns-1))
393     f.write('kmax\t*\tnumber of nuisance parameters (sources of systematical uncertainties)\n\n')
394     f.write('shapes * * vhbb_%s_%s.root $CHANNEL%s$PROCESS $CHANNEL%s$PROCESS$SYSTEMATIC\n\n'%(DCtype,ROOToutname,DCprocessseparatordict[DCtype],DCprocessseparatordict[DCtype]))
395     f.write('bin\t%s\n\n'%Datacardbin)
396 nmohr 1.68 f.write('observation\t%s\n\n'%(int(theData.Integral())))
397     # datacard bin
398 peller 1.62 f.write('bin')
399     for c in range(0,columns): f.write('\t%s'%Datacardbin)
400     f.write('\n')
401 nmohr 1.68 # datacard process
402 peller 1.62 f.write('process')
403     for c in setup: f.write('\t%s'%Dict[c])
404     f.write('\n')
405     f.write('process')
406     for c in range(0,columns): f.write('\t%s'%c)
407     f.write('\n')
408 nmohr 1.68 # datacard yields
409 peller 1.62 f.write('rate')
410 nmohr 1.68 for c in setup:
411     f.write('\t%s'%final_histos['nominal'][c].Integral())
412 peller 1.29 f.write('\n')
413 nmohr 1.68 # get list of systematics in use
414 peller 1.62 InUse=eval(config.get('Datacard','InUse_%s'%pt_region))
415 nmohr 1.68 # write non-shape systematics
416 peller 1.62 for item in InUse:
417     f.write(item)
418     what=eval(config.get('Datacard',item))
419     f.write('\t%s'%what['type'])
420     for c in setup:
421     if c in what:
422     if item == 'CMS_eff_e' and 'Zmm' in options[10]: f.write('\t-')
423     elif item == 'CMS_eff_m' and 'Zee' in options[10]: f.write('\t-')
424     elif item == 'CMS_trigger_e' and 'Zmm' in options[10]: f.write('\t-')
425     elif item == 'CMS_trigger_m' and 'Zee' in options[10]: f.write('\t-')
426 peller 1.35 else:
427 peller 1.62 f.write('\t%s'%what[c])
428 peller 1.35 else:
429     f.write('\t-')
430     f.write('\n')
431 nmohr 1.68 # Write statistical shape variations
432 peller 1.62 if binstat:
433     for c in setup:
434     for bin in range(0,nBins):
435 nmohr 1.71 f.write('%s_%s_%s_%s\tshape'%(systematicsnaming['stats'],Dict[c], bin, options[10]))
436 peller 1.62 for it in range(0,columns):
437     if it == setup.index(c):
438     f.write('\t1.0')
439     else:
440     f.write('\t-')
441     f.write('\n')
442     else:
443 nmohr 1.51 for c in setup:
444 nmohr 1.71 f.write('%s_%s_%s\tshape'%(systematicsnaming['stats'],Dict[c], options[10]))
445 nmohr 1.37 for it in range(0,columns):
446     if it == setup.index(c):
447 peller 1.62 f.write('\t1.0')
448 nmohr 1.37 else:
449 peller 1.62 f.write('\t-')
450 nmohr 1.37 f.write('\n')
451 nmohr 1.68 # UEPS systematics
452 peller 1.62 if weightF_sys:
453     f.write('UEPS\tshape')
454     for it in range(0,columns): f.write('\t1.0')
455     f.write('\n')
456 nmohr 1.68 # additional sample systematics
457 peller 1.62 if addSample_sys:
458     alreadyAdded = []
459     for newSample in addSample_sys.iterkeys():
460     for c in setup:
461     if not c == Group[newSample]: continue
462     if Dict[c] in alreadyAdded: continue
463 nmohr 1.71 f.write('%s_%s\tshape'%(systematicsnaming['model'],Dict[c]))
464 peller 1.62 for it in range(0,columns):
465     if it == setup.index(c):
466     f.write('\t1.0')
467     else:
468     f.write('\t-')
469     f.write('\n')
470     alreadyAdded.append(Dict[c])
471 nmohr 1.68 # regular systematics
472 peller 1.62 for sys in systematics:
473     sys_factor=sys_factor_dict[sys]
474     f.write('%s\tshape'%systematicsnaming[sys])
475 peller 1.63 for c in setup:
476     if c in sys_affecting[sys]:
477     f.write('\t%s'%sys_factor)
478     else:
479     f.write('\t-')
480 peller 1.62 f.write('\n')
481     f.close()
482 nmohr 1.68 # --------------------------------------------------------------------------
483    
484    
485    
486    
487 nmohr 1.31 outfile.Close()