ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/workspace_datacard.py
Revision: 1.71
Committed: Tue Feb 5 15:14:48 2013 UTC (12 years, 3 months ago) by nmohr
Content type: text/x-python
Branch: MAIN
Changes since 1.70: +43 -11 lines
Log Message:
Add additional sample syst

File Contents

# User Rev Content
1 peller 1.8 #!/usr/bin/env python
2 peller 1.53 import os, sys, ROOT, warnings, pickle
3 nmohr 1.68 ROOT.gROOT.SetBatch(True)
4 peller 1.1 from array import array
5     from math import sqrt
6 nmohr 1.68 from copy import copy, deepcopy
7 peller 1.1 #suppres the EvalInstace conversion warning bug
8     warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
9 nmohr 1.31 from optparse import OptionParser
10 nmohr 1.68 from myutils import BetterConfigParser, Sample, progbar, printc, ParseInfo, Rebinner, TreeCache, HistoMaker
11 peller 1.1
12 nmohr 1.68 #--CONFIGURE---------------------------------------------------------------------
13 nmohr 1.31 argv = sys.argv
14     parser = OptionParser()
15     parser.add_option("-V", "--var", dest="variable", default="",
16     help="variable for shape analysis")
17     parser.add_option("-C", "--config", dest="config", default=[], action="append",
18     help="configuration file")
19     (opts, args) = parser.parse_args(argv)
20 nmohr 1.20 config = BetterConfigParser()
21 nmohr 1.31 config.read(opts.config)
22 nmohr 1.68 var=opts.variable
23     #-------------------------------------------------------------------------------
24    
25     #--read variables from config---------------------------------------------------
26     # 7 or 8TeV Analysis
27 nmohr 1.31 anaTag = config.get("Analysis","tag")
28 nmohr 1.68 if not any([anaTag == '7TeV',anaTag == '8TeV']):
29     raise Exception("anaTag %s unknown. Specify 8TeV or 7TeV in the general config"%anaTag)
30     # Directories:
31 peller 1.1 Wdir=config.get('Directories','Wdir')
32 peller 1.42 vhbbpath=config.get('Directories','vhbbpath')
33 nmohr 1.46 samplesinfo=config.get('Directories','samplesinfo')
34 peller 1.67 path = config.get('Directories','dcSamples')
35 nmohr 1.68 outpath=config.get('Directories','limits')
36     try:
37     os.stat(outpath)
38     except:
39     os.mkdir(outpath)
40     # parse histogram config:
41     hist_conf=config.get('Limit',var)
42     options = hist_conf.split(',')
43 bortigno 1.7 if len(options) < 12:
44 nmohr 1.68 raise Exception("You have to choose option[11]: either Mjj or BDT")
45     treevar = options[0]
46     name = options[1]
47 peller 1.1 title = options[2]
48 nmohr 1.68 nBinsRB = int(options[3])
49     xMin = float(options[4])
50     xMax = float(options[5])
51     ROOToutname = options[6]
52     RCut = options[7]
53     SCut = options[8]
54     signals = options[9].split(' ')
55     datas = options[10].split(' ')
56     anType = options[11]
57 peller 1.54 nBins= int(config.get('LimitGeneral','BDTbinning'))
58 peller 1.29 setup=eval(config.get('LimitGeneral','setup'))
59 nmohr 1.68 #Systematics:
60     if config.has_option('LimitGeneral','addSample_sys'):
61     addSample_sys = eval(config.get('LimitGeneral','addSample_sys'))
62     additionals = [addSample_sys[key] for key in addSample_sys]
63     else:
64     addSample_sys = None
65     additionals = []
66     #find out if BDT or MJJ:
67     bdt = False
68     mjj = False
69     if str(anType) == 'BDT':
70     bdt = True
71     systematics = eval(config.get('LimitGeneral','sys_BDT'))
72     elif str(anType) == 'Mjj':
73     mjj = True
74     systematics = eval(config.get('LimitGeneral','sys_Mjj'))
75     sys_cut_suffix=eval(config.get('LimitGeneral','sys_cut_suffix'))
76     systematicsnaming = eval(config.get('LimitGeneral','systematicsnaming'))
77     sys_factor_dict = eval(config.get('LimitGeneral','sys_factor'))
78     sys_affecting = eval(config.get('LimitGeneral','sys_affecting'))
79     # weightF:
80     weightF = config.get('Weights','weightF')
81     weightF_sys = eval(config.get('LimitGeneral','weightF_sys'))
82 peller 1.58
83 nmohr 1.68 # get nominal cutstring:
84     treecut = config.get('Cuts',RCut)
85     # Train flag: splitting of samples
86     TrainFlag = eval(config.get('Analysis','TrainFlag'))
87     # scaling options
88     scaling=eval(config.get('LimitGeneral','scaling'))
89     rescaleSqrtN=eval(config.get('LimitGeneral','rescaleSqrtN'))
90     # blind data option:
91     blind=eval(config.get('LimitGeneral','blind'))
92     if blind:
93     printc('red','', 'I AM BLINDED!')
94     #get List of backgrounds in use:
95     backgrounds = eval(config.get('LimitGeneral','BKG'))
96     #Groups for adding samples together
97     Group = eval(config.get('LimitGeneral','Group'))
98     #naming for DC
99     Dict= eval(config.get('LimitGeneral','Dict'))
100     #treating statistics bin-by-bin:
101     binstat = eval(config.get('LimitGeneral','binstat'))
102     # Use the rebinning:
103     rebin_active=eval(config.get('LimitGeneral','rebin_active'))
104     max_rel = float(config.get('LimitGeneral','rebin_max_rel'))
105     signal_inject=config.get('LimitGeneral','signal_inject')
106     # add signal as background
107     add_signal_as_bkg=config.get('LimitGeneral','add_signal_as_bkg')
108     if not add_signal_as_bkg == 'None':
109     setup.append(add_signal_as_bkg)
110     #----------------------------------------------------------------------------
111 peller 1.65
112 nmohr 1.68 #--Setup--------------------------------------------------------------------
113     #Assign Pt region for sys factors
114 peller 1.58 if 'HighPtLooseBTag' in ROOToutname:
115     pt_region = 'HighPtLooseBTag'
116 peller 1.61 elif 'HighPt' in ROOToutname or 'highPt' in ROOToutname or 'medPt' in ROOToutname:
117 peller 1.58 pt_region = 'HighPt'
118 peller 1.60 elif 'LowPt' in ROOToutname or 'lowPt' in ROOToutname:
119 peller 1.58 pt_region = 'LowPt'
120 nmohr 1.68 elif 'ATLAS' in ROOToutname:
121     pt_region = 'HighPt'
122     elif 'Mjj' in ROOToutname:
123     pt_region = 'HighPt'
124     else:
125 peller 1.58 print "Unknown Pt region"
126     sys.exit("Unknown Pt region")
127 nmohr 1.68 # Set rescale factor of 2 in case of TrainFalg
128 peller 1.44 if TrainFlag:
129     MC_rescale_factor=2.
130     print 'I RESCALE BY 2.0'
131     else: MC_rescale_factor = 1.
132 nmohr 1.68 #systematics up/down
133     UD = ['Up','Down']
134     # rename Bins in DC (?)
135 peller 1.11 if 'RTight' in RCut:
136 peller 1.62 Datacardbin=options[10]
137 peller 1.11 elif 'RMed' in RCut:
138 peller 1.62 Datacardbin=options[10]
139 peller 1.11 else:
140 peller 1.62 Datacardbin=options[10]
141 nmohr 1.68 #Parse samples configuration
142     info = ParseInfo(samplesinfo,path)
143     # get all the treeCut sets
144     # create different sample Lists
145     all_samples = info.get_samples(signals+backgrounds+additionals)
146     signal_samples = info.get_samples(signals)
147     background_samples = info.get_samples(backgrounds)
148     data_samples = info.get_samples(datas)
149     #cache all samples
150     #t_cache = TreeCache(cuts,all_samples,path)
151     #cache datas
152     #d_cache = TreeCache(trecut,data_samples,path)
153     #-------------------------------------------------------------------------------------------------
154    
155     optionsList=[]
156    
157     def appendList(): optionsList.append({'cut':copy(_cut),'var':copy(_treevar),'name':copy(_name),'nBins':nBins,'xMin':xMin,'xMax':xMax,'weight':copy(_weight),'blind':copy(_blind)})
158    
159     #nominal
160     _cut = treecut
161     _treevar = treevar
162     _name = title
163     _weight = weightF
164     _blind = blind
165     appendList()
166    
167     #the 4 sys
168     for syst in systematics:
169     for Q in UD:
170     #default:
171     _cut = treecut
172     _name = title
173     _weight = weightF
174     #replace cut string
175     new_cut=sys_cut_suffix[syst]
176     if not new_cut == 'nominal':
177     old_str,new_str=new_cut.split('>')
178     _cut = treecut.replace(old_str,new_str.replace('?',Q))
179     _name = title
180     _weight = weightF
181     #replace tree variable
182     if bdt == True:
183     ff[1]='%s_%s'%(sys,Q.lower())
184     _treevar = nominalShape.replace('.nominal','.%s_%s'%(sys,Q.lower()))
185     elif mjj == True:
186     if sys == 'JER' or sys == 'JES':
187     _treevar = 'H_%s.mass_%s'%(sys,Q.lower())
188     else:
189     _treevar = treevar
190     #append
191     appendList()
192    
193     #UEPS
194     if weightF_sys:
195     for _weight in [config.get('Weights','weightF_sys_UP'),config.get('Weights','weightF_sys_DOWN')]:
196     _cut = treecut
197     _treevar = treevar
198     _name = title
199     appendList()
200    
201     #for option in optionsList:
202     # print option['cut']
203    
204    
205     mc_hMaker = HistoMaker(all_samples,path,config,optionsList)
206     data_hMaker = HistoMaker(data_samples,path,config,[optionsList[0]])
207 peller 1.36
208 nmohr 1.68 #Calculate lumi
209     lumi = 0.
210     nData = 0
211     for job in data_samples:
212     nData += 1
213     lumi += float(job.lumi)
214 peller 1.57
215 nmohr 1.68 if nData > 1:
216     lumi = lumi/float(nData)
217 nmohr 1.24
218 nmohr 1.68 mc_hMaker.lumi = lumi
219     data_hMaker.lumi = lumi
220 peller 1.62
221 peller 1.69 mc_hMaker.calc_rebin(background_samples)
222     #transfer rebinning info to data maker
223     data_hMaker.norebin_nBins = mc_hMaker.norebin_nBins
224     data_hMaker.rebin_nBins = mc_hMaker.rebin_nBins
225     data_hMaker.mybinning = mc_hMaker.mybinning
226    
227     data_hMaker.rebin = True
228    
229     #mc_hMaker.rebin = False
230     #data_hMaker.rebin = False
231    
232 nmohr 1.68 all_histos = {}
233     data_histos = {}
234 peller 1.53
235 nmohr 1.68 for job in all_samples:
236     all_histos[job.name] = mc_hMaker.get_histos_from_tree(job)
237 peller 1.53
238 nmohr 1.68 for job in data_samples:
239     data_histos[job.name] = data_hMaker.get_histos_from_tree(job)[0]['DATA']
240 peller 1.23
241 nmohr 1.68 nData = 0
242     for job in data_histos:
243     if nData == 0:
244     theData = data_histos[job]
245     else:
246     theData.Add(data_histos[i])
247 peller 1.11
248 nmohr 1.68 #-- Write Files-----------------------------------------------------------------------------------
249     # generate the TH outfile:
250     outfile = ROOT.TFile(outpath+'vhbb_TH_'+ROOToutname+'.root', 'RECREATE')
251     outfile.mkdir(Datacardbin,Datacardbin)
252     outfile.cd(Datacardbin)
253     # generate the Workspace:
254 peller 1.62 WS = ROOT.RooWorkspace('%s'%Datacardbin,'%s'%Datacardbin) #Zee
255 peller 1.1 print 'WS initialized'
256 bortigno 1.7 disc= ROOT.RooRealVar(name,name,xMin,xMax)
257 peller 1.1 obs = ROOT.RooArgList(disc)
258 nmohr 1.68 #
259 peller 1.1 ROOT.gROOT.SetStyle("Plain")
260 peller 1.53
261    
262 nmohr 1.68 # ToDo:
263 peller 1.53 #---- get the BKG for the rebinning calculation----
264 nmohr 1.68 #Rebinner.calculate_binning(hDummyRB,max_rel)
265     #myBinning=Rebinner(int(nBins),array('d',[-1.0]+[hDummyRB.GetBinLowEdge(i) for i in binlist]),rebin_active)
266 peller 1.53 #--------------------------------------------------
267    
268 nmohr 1.68 #order and add all together
269     final_histos = {}
270 peller 1.57
271 nmohr 1.68 print '\n\t--> Ordering and Adding Histos\n'
272 peller 1.53
273 nmohr 1.68 #NOMINAL:
274     final_histos['nominal'] = HistoMaker.orderandadd([all_histos['%s'%job][0] for job in all_samples],setup)
275 peller 1.53
276 nmohr 1.68 #SYSTEMATICS:
277     ind = 1
278     for syst in systematics:
279     for Q in UD:
280 nmohr 1.71 final_histos['%s_%s'%(systematicsnaming[syst],Q)] = HistoMaker.orderandadd([all_histos[job.name][ind] for job in all_samples],setup)
281 nmohr 1.68 ind+=1
282    
283     if weightF_sys:
284     for Q in UD:
285 nmohr 1.71 final_histos['%s_%s'%(systematicsnaming['weightF_sys'],Q)]= HistoMaker.orderandadd([all_histos[job.name][ind] for job in all_samples],setup)
286 nmohr 1.68 ind+=1
287    
288 nmohr 1.71 def get_alternate_shape(hNominal,hAlternate):
289     hVar = hAlternate.Clone()
290     hNom = hNominal.Clone()
291     hAlt = hNom.Clone()
292     hNom.Add(hVar,-1.)
293     hAlt.Add(hNom)
294     for bin in range(0,hNominal.GetNbinsX()+1):
295     if hAlt.GetBinContent(bin) < 0.: hAlt.SetBinContent(bin,0.)
296     return hVar,hAlt
297    
298     def get_alternate_shapes(all_histos,asample_dict,all_samples):
299     alternate_shapes_up = []
300     alternate_shapes_down = []
301     for job in all_samples:
302     nominal = all_histos[job.name][0]
303     if job.name in asample_dict:
304     alternate = copy(all_histos[asample_dict[job.name]][0])
305     hUp, hDown = get_alternate_shape(nominal[nominal.keys()[0]],alternate[alternate.keys()[0]])
306     alternate_shapes_up.append({nominal.keys()[0]:hUp})
307     alternate_shapes_down.append({nominal.keys()[0]:hDown})
308     else:
309     alternate_shapes_up.append(copy(nominal))
310     alternate_shapes_down.append(copy(nominal))
311     return alternate_shapes_up, alternate_shapes_down
312    
313     if addSample_sys:
314     aUp, aDown = get_alternate_shapes(all_histos,addSample_sys,all_samples)
315     final_histos['%s_Up'%(systematicsnaming['model'])]= HistoMaker.orderandadd(aUp,setup)
316     final_histos['%s_Down'%(systematicsnaming['model'])]= HistoMaker.orderandadd(aDown,setup)
317    
318 nmohr 1.68 #make statistical shapes:
319     for Q in UD:
320     final_histos['%s_%s'%(systematicsnaming['stats'],Q)] = {}
321     for job,hist in final_histos['nominal'].items():
322     for Q in UD:
323     final_histos['%s_%s'%(systematicsnaming['stats'],Q)][job] = hist.Clone()
324     for j in range(hist.GetNbinsX()+1):
325     if Q == 'Up':
326     final_histos['%s_%s'%(systematicsnaming['stats'],Q)][job].SetBinContent(j,max(0,hist.GetBinContent(j)+hist.GetBinError(j)))
327     if Q == 'Down':
328     final_histos['%s_%s'%(systematicsnaming['stats'],Q)][job].SetBinContent(j,max(0,hist.GetBinContent(j)-hist.GetBinError(j)))
329    
330     #write shapes in WS:
331     for key in final_histos:
332     for job, hist in final_histos[key].items():
333     if 'nominal' == key:
334 nmohr 1.71 hist.SetName('%s'%(Dict[job]))
335 peller 1.70 hist.Write()
336 nmohr 1.71 rooDataHist = ROOT.RooDataHist('%s' %(Dict[job]),'%s'%(Dict[job]),obs, hist)
337 nmohr 1.68 getattr(WS,'import')(rooDataHist)
338     for Q in UD:
339     if Q in key:
340     theSyst = key.replace('_%s'%Q,'')
341     else:
342     continue
343 nmohr 1.71 if systematicsnaming['stats'] in key:
344     nameSyst = '%s_%s_%s' %(theSyst,Dict[job],Datacardbin)
345     elif systematicsnaming['model'] in key:
346     nameSyst = '%s_%s' %(theSyst,Dict[job])
347 nmohr 1.68 else:
348     nameSyst = theSyst
349 nmohr 1.71 hist.SetName('%s%s%s' %(Dict[job],nameSyst,Q))
350 peller 1.70 hist.Write()
351 nmohr 1.71 rooDataHist = ROOT.RooDataHist('%s%s%s' %(Dict[job],nameSyst,Q),'%s%s%s'%(Dict[job],nameSyst,Q),obs, hist)
352 nmohr 1.68 getattr(WS,'import')(rooDataHist)
353 peller 1.48
354 peller 1.70 theData.SetName('data_obs')
355     theData.Write()
356 nmohr 1.68 rooDataHist = ROOT.RooDataHist('data_obs','data_obs',obs, theData)
357     getattr(WS,'import')(rooDataHist)
358 peller 1.13
359 nmohr 1.68 WS.writeToFile(outpath+'vhbb_WS_'+ROOToutname+'.root')
360 peller 1.47
361 nmohr 1.68 # now we have a Dict final_histos with sets of all grouped MCs for all systematics:
362     # nominal, ($SYS_Up/Down)*4, weightF_sys_Up/Down, stats_Up/Down
363 peller 1.36
364 peller 1.69 print '\n\t >>> PRINTOUT PRETTY TABLE <<<\n'
365     #header
366     printout = ''
367     printout += '%-25s'%'Process'
368     printout += ':'
369     for item, val in final_histos['nominal'].items():
370 peller 1.70 printout += '%-12s'%item
371 peller 1.69 print printout+'\n'
372 nmohr 1.68 for key in final_histos:
373     printout = ''
374 peller 1.69 printout += '%-25s'%key
375     printout += ':'
376 nmohr 1.68 for item, val in final_histos[key].items():
377 peller 1.69 printout += '%-10s'%'%0.5f'%val.Integral()
378 nmohr 1.68 print printout
379 peller 1.1
380 nmohr 1.68 #-----------------------------------------------------------------------------------------------------------
381 peller 1.36
382 peller 1.62 # -------------------- write DATAcard: ----------------------------------------------------------------------
383     DCprocessseparatordict = {'WS':':','TH':'/'}
384 nmohr 1.68 # create two datacards: for TH an WS
385 peller 1.62 for DCtype in ['WS','TH']:
386     columns=len(setup)
387     f = open(outpath+'vhbb_DC_%s_%s.txt'%(DCtype,ROOToutname),'w')
388     f.write('imax\t1\tnumber of channels\n')
389     f.write('jmax\t%s\tnumber of backgrounds (\'*\' = automatic)\n'%(columns-1))
390     f.write('kmax\t*\tnumber of nuisance parameters (sources of systematical uncertainties)\n\n')
391     f.write('shapes * * vhbb_%s_%s.root $CHANNEL%s$PROCESS $CHANNEL%s$PROCESS$SYSTEMATIC\n\n'%(DCtype,ROOToutname,DCprocessseparatordict[DCtype],DCprocessseparatordict[DCtype]))
392     f.write('bin\t%s\n\n'%Datacardbin)
393 nmohr 1.68 f.write('observation\t%s\n\n'%(int(theData.Integral())))
394     # datacard bin
395 peller 1.62 f.write('bin')
396     for c in range(0,columns): f.write('\t%s'%Datacardbin)
397     f.write('\n')
398 nmohr 1.68 # datacard process
399 peller 1.62 f.write('process')
400     for c in setup: f.write('\t%s'%Dict[c])
401     f.write('\n')
402     f.write('process')
403     for c in range(0,columns): f.write('\t%s'%c)
404     f.write('\n')
405 nmohr 1.68 # datacard yields
406 peller 1.62 f.write('rate')
407 nmohr 1.68 for c in setup:
408     f.write('\t%s'%final_histos['nominal'][c].Integral())
409 peller 1.29 f.write('\n')
410 nmohr 1.68 # get list of systematics in use
411 peller 1.62 InUse=eval(config.get('Datacard','InUse_%s'%pt_region))
412 nmohr 1.68 # write non-shape systematics
413 peller 1.62 for item in InUse:
414     f.write(item)
415     what=eval(config.get('Datacard',item))
416     f.write('\t%s'%what['type'])
417     for c in setup:
418     if c in what:
419     if item == 'CMS_eff_e' and 'Zmm' in options[10]: f.write('\t-')
420     elif item == 'CMS_eff_m' and 'Zee' in options[10]: f.write('\t-')
421     elif item == 'CMS_trigger_e' and 'Zmm' in options[10]: f.write('\t-')
422     elif item == 'CMS_trigger_m' and 'Zee' in options[10]: f.write('\t-')
423 peller 1.35 else:
424 peller 1.62 f.write('\t%s'%what[c])
425 peller 1.35 else:
426     f.write('\t-')
427     f.write('\n')
428 nmohr 1.68 # Write statistical shape variations
429 peller 1.62 if binstat:
430     for c in setup:
431     for bin in range(0,nBins):
432 nmohr 1.71 f.write('%s_%s_%s_%s\tshape'%(systematicsnaming['stats'],Dict[c], bin, options[10]))
433 peller 1.62 for it in range(0,columns):
434     if it == setup.index(c):
435     f.write('\t1.0')
436     else:
437     f.write('\t-')
438     f.write('\n')
439     else:
440 nmohr 1.51 for c in setup:
441 nmohr 1.71 f.write('%s_%s_%s\tshape'%(systematicsnaming['stats'],Dict[c], options[10]))
442 nmohr 1.37 for it in range(0,columns):
443     if it == setup.index(c):
444 peller 1.62 f.write('\t1.0')
445 nmohr 1.37 else:
446 peller 1.62 f.write('\t-')
447 nmohr 1.37 f.write('\n')
448 nmohr 1.68 # UEPS systematics
449 peller 1.62 if weightF_sys:
450     f.write('UEPS\tshape')
451     for it in range(0,columns): f.write('\t1.0')
452     f.write('\n')
453 nmohr 1.68 # additional sample systematics
454 peller 1.62 if addSample_sys:
455     alreadyAdded = []
456     for newSample in addSample_sys.iterkeys():
457     for c in setup:
458     if not c == Group[newSample]: continue
459     if Dict[c] in alreadyAdded: continue
460 nmohr 1.71 f.write('%s_%s\tshape'%(systematicsnaming['model'],Dict[c]))
461 peller 1.62 for it in range(0,columns):
462     if it == setup.index(c):
463     f.write('\t1.0')
464     else:
465     f.write('\t-')
466     f.write('\n')
467     alreadyAdded.append(Dict[c])
468 nmohr 1.68 # regular systematics
469 peller 1.62 for sys in systematics:
470     sys_factor=sys_factor_dict[sys]
471     f.write('%s\tshape'%systematicsnaming[sys])
472 peller 1.63 for c in setup:
473     if c in sys_affecting[sys]:
474     f.write('\t%s'%sys_factor)
475     else:
476     f.write('\t-')
477 peller 1.62 f.write('\n')
478     f.close()
479 nmohr 1.68 # --------------------------------------------------------------------------
480    
481    
482    
483    
484 nmohr 1.31 outfile.Close()