ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/workspace_datacard.py
Revision: 1.72
Committed: Tue Feb 5 15:41:08 2013 UTC (12 years, 3 months ago) by peller
Content type: text/x-python
Branch: MAIN
Changes since 1.71: +10 -10 lines
Log Message:
removed obsolete options

File Contents

# User Rev Content
1 peller 1.8 #!/usr/bin/env python
2 peller 1.53 import os, sys, ROOT, warnings, pickle
3 nmohr 1.68 ROOT.gROOT.SetBatch(True)
4 peller 1.1 from array import array
5     from math import sqrt
6 nmohr 1.68 from copy import copy, deepcopy
7 peller 1.1 #suppres the EvalInstace conversion warning bug
8     warnings.filterwarnings( action='ignore', category=RuntimeWarning, message='creating converter.*' )
9 nmohr 1.31 from optparse import OptionParser
10 nmohr 1.68 from myutils import BetterConfigParser, Sample, progbar, printc, ParseInfo, Rebinner, TreeCache, HistoMaker
11 peller 1.1
12 nmohr 1.68 #--CONFIGURE---------------------------------------------------------------------
13 nmohr 1.31 argv = sys.argv
14     parser = OptionParser()
15     parser.add_option("-V", "--var", dest="variable", default="",
16     help="variable for shape analysis")
17     parser.add_option("-C", "--config", dest="config", default=[], action="append",
18     help="configuration file")
19     (opts, args) = parser.parse_args(argv)
20 nmohr 1.20 config = BetterConfigParser()
21 nmohr 1.31 config.read(opts.config)
22 nmohr 1.68 var=opts.variable
23     #-------------------------------------------------------------------------------
24    
25     #--read variables from config---------------------------------------------------
26     # 7 or 8TeV Analysis
27 nmohr 1.31 anaTag = config.get("Analysis","tag")
28 nmohr 1.68 if not any([anaTag == '7TeV',anaTag == '8TeV']):
29     raise Exception("anaTag %s unknown. Specify 8TeV or 7TeV in the general config"%anaTag)
30     # Directories:
31 peller 1.1 Wdir=config.get('Directories','Wdir')
32 peller 1.42 vhbbpath=config.get('Directories','vhbbpath')
33 nmohr 1.46 samplesinfo=config.get('Directories','samplesinfo')
34 peller 1.67 path = config.get('Directories','dcSamples')
35 nmohr 1.68 outpath=config.get('Directories','limits')
36     try:
37     os.stat(outpath)
38     except:
39     os.mkdir(outpath)
40     # parse histogram config:
41     hist_conf=config.get('Limit',var)
42     options = hist_conf.split(',')
43 bortigno 1.7 if len(options) < 12:
44 nmohr 1.68 raise Exception("You have to choose option[11]: either Mjj or BDT")
45     treevar = options[0]
46     name = options[1]
47 peller 1.1 title = options[2]
48 peller 1.72 nBins = int(options[3])
49 nmohr 1.68 xMin = float(options[4])
50     xMax = float(options[5])
51     ROOToutname = options[6]
52     RCut = options[7]
53     SCut = options[8]
54     signals = options[9].split(' ')
55     datas = options[10].split(' ')
56     anType = options[11]
57 peller 1.29 setup=eval(config.get('LimitGeneral','setup'))
58 nmohr 1.68 #Systematics:
59     if config.has_option('LimitGeneral','addSample_sys'):
60     addSample_sys = eval(config.get('LimitGeneral','addSample_sys'))
61     additionals = [addSample_sys[key] for key in addSample_sys]
62     else:
63     addSample_sys = None
64     additionals = []
65     #find out if BDT or MJJ:
66     bdt = False
67     mjj = False
68     if str(anType) == 'BDT':
69     bdt = True
70     systematics = eval(config.get('LimitGeneral','sys_BDT'))
71     elif str(anType) == 'Mjj':
72     mjj = True
73     systematics = eval(config.get('LimitGeneral','sys_Mjj'))
74     sys_cut_suffix=eval(config.get('LimitGeneral','sys_cut_suffix'))
75     systematicsnaming = eval(config.get('LimitGeneral','systematicsnaming'))
76     sys_factor_dict = eval(config.get('LimitGeneral','sys_factor'))
77     sys_affecting = eval(config.get('LimitGeneral','sys_affecting'))
78     # weightF:
79     weightF = config.get('Weights','weightF')
80     weightF_sys = eval(config.get('LimitGeneral','weightF_sys'))
81 peller 1.58
82 nmohr 1.68 # get nominal cutstring:
83     treecut = config.get('Cuts',RCut)
84     # Train flag: splitting of samples
85     TrainFlag = eval(config.get('Analysis','TrainFlag'))
86     # scaling options
87     scaling=eval(config.get('LimitGeneral','scaling'))
88     rescaleSqrtN=eval(config.get('LimitGeneral','rescaleSqrtN'))
89     # blind data option:
90     blind=eval(config.get('LimitGeneral','blind'))
91     if blind:
92     printc('red','', 'I AM BLINDED!')
93     #get List of backgrounds in use:
94     backgrounds = eval(config.get('LimitGeneral','BKG'))
95     #Groups for adding samples together
96     Group = eval(config.get('LimitGeneral','Group'))
97     #naming for DC
98     Dict= eval(config.get('LimitGeneral','Dict'))
99     #treating statistics bin-by-bin:
100     binstat = eval(config.get('LimitGeneral','binstat'))
101     # Use the rebinning:
102     rebin_active=eval(config.get('LimitGeneral','rebin_active'))
103 peller 1.72 #max_rel = float(config.get('LimitGeneral','rebin_max_rel'))
104 nmohr 1.68 signal_inject=config.get('LimitGeneral','signal_inject')
105     # add signal as background
106     add_signal_as_bkg=config.get('LimitGeneral','add_signal_as_bkg')
107     if not add_signal_as_bkg == 'None':
108     setup.append(add_signal_as_bkg)
109     #----------------------------------------------------------------------------
110 peller 1.65
111 nmohr 1.68 #--Setup--------------------------------------------------------------------
112     #Assign Pt region for sys factors
113 peller 1.58 if 'HighPtLooseBTag' in ROOToutname:
114     pt_region = 'HighPtLooseBTag'
115 peller 1.61 elif 'HighPt' in ROOToutname or 'highPt' in ROOToutname or 'medPt' in ROOToutname:
116 peller 1.58 pt_region = 'HighPt'
117 peller 1.60 elif 'LowPt' in ROOToutname or 'lowPt' in ROOToutname:
118 peller 1.58 pt_region = 'LowPt'
119 nmohr 1.68 elif 'ATLAS' in ROOToutname:
120     pt_region = 'HighPt'
121     elif 'Mjj' in ROOToutname:
122     pt_region = 'HighPt'
123     else:
124 peller 1.58 print "Unknown Pt region"
125     sys.exit("Unknown Pt region")
126 nmohr 1.68 # Set rescale factor of 2 in case of TrainFalg
127 peller 1.44 if TrainFlag:
128     MC_rescale_factor=2.
129     print 'I RESCALE BY 2.0'
130     else: MC_rescale_factor = 1.
131 nmohr 1.68 #systematics up/down
132     UD = ['Up','Down']
133     # rename Bins in DC (?)
134 peller 1.11 if 'RTight' in RCut:
135 peller 1.62 Datacardbin=options[10]
136 peller 1.11 elif 'RMed' in RCut:
137 peller 1.62 Datacardbin=options[10]
138 peller 1.11 else:
139 peller 1.62 Datacardbin=options[10]
140 nmohr 1.68 #Parse samples configuration
141     info = ParseInfo(samplesinfo,path)
142     # get all the treeCut sets
143     # create different sample Lists
144     all_samples = info.get_samples(signals+backgrounds+additionals)
145     signal_samples = info.get_samples(signals)
146     background_samples = info.get_samples(backgrounds)
147     data_samples = info.get_samples(datas)
148     #cache all samples
149     #t_cache = TreeCache(cuts,all_samples,path)
150     #cache datas
151     #d_cache = TreeCache(trecut,data_samples,path)
152     #-------------------------------------------------------------------------------------------------
153    
154     optionsList=[]
155    
156     def appendList(): optionsList.append({'cut':copy(_cut),'var':copy(_treevar),'name':copy(_name),'nBins':nBins,'xMin':xMin,'xMax':xMax,'weight':copy(_weight),'blind':copy(_blind)})
157    
158     #nominal
159     _cut = treecut
160     _treevar = treevar
161     _name = title
162     _weight = weightF
163     _blind = blind
164     appendList()
165    
166     #the 4 sys
167     for syst in systematics:
168     for Q in UD:
169     #default:
170     _cut = treecut
171     _name = title
172     _weight = weightF
173     #replace cut string
174     new_cut=sys_cut_suffix[syst]
175     if not new_cut == 'nominal':
176     old_str,new_str=new_cut.split('>')
177     _cut = treecut.replace(old_str,new_str.replace('?',Q))
178     _name = title
179     _weight = weightF
180     #replace tree variable
181     if bdt == True:
182     ff[1]='%s_%s'%(sys,Q.lower())
183     _treevar = nominalShape.replace('.nominal','.%s_%s'%(sys,Q.lower()))
184     elif mjj == True:
185     if sys == 'JER' or sys == 'JES':
186     _treevar = 'H_%s.mass_%s'%(sys,Q.lower())
187     else:
188     _treevar = treevar
189     #append
190     appendList()
191    
192     #UEPS
193     if weightF_sys:
194     for _weight in [config.get('Weights','weightF_sys_UP'),config.get('Weights','weightF_sys_DOWN')]:
195     _cut = treecut
196     _treevar = treevar
197     _name = title
198     appendList()
199    
200     #for option in optionsList:
201     # print option['cut']
202    
203    
204     mc_hMaker = HistoMaker(all_samples,path,config,optionsList)
205     data_hMaker = HistoMaker(data_samples,path,config,[optionsList[0]])
206 peller 1.36
207 nmohr 1.68 #Calculate lumi
208     lumi = 0.
209     nData = 0
210     for job in data_samples:
211     nData += 1
212     lumi += float(job.lumi)
213 peller 1.57
214 nmohr 1.68 if nData > 1:
215     lumi = lumi/float(nData)
216 nmohr 1.24
217 nmohr 1.68 mc_hMaker.lumi = lumi
218     data_hMaker.lumi = lumi
219 peller 1.62
220 peller 1.69
221 peller 1.72 if rebin_active:
222     mc_hMaker.calc_rebin(background_samples)
223     #transfer rebinning info to data maker
224     data_hMaker.norebin_nBins = mc_hMaker.norebin_nBins
225     data_hMaker.rebin_nBins = mc_hMaker.rebin_nBins
226     data_hMaker.mybinning = mc_hMaker.mybinning
227     data_hMaker.rebin = True
228 peller 1.69
229     #mc_hMaker.rebin = False
230     #data_hMaker.rebin = False
231    
232 nmohr 1.68 all_histos = {}
233     data_histos = {}
234 peller 1.53
235 nmohr 1.68 for job in all_samples:
236     all_histos[job.name] = mc_hMaker.get_histos_from_tree(job)
237 peller 1.53
238 nmohr 1.68 for job in data_samples:
239     data_histos[job.name] = data_hMaker.get_histos_from_tree(job)[0]['DATA']
240 peller 1.23
241 nmohr 1.68 nData = 0
242     for job in data_histos:
243     if nData == 0:
244     theData = data_histos[job]
245     else:
246     theData.Add(data_histos[i])
247 peller 1.11
248 nmohr 1.68 #-- Write Files-----------------------------------------------------------------------------------
249     # generate the TH outfile:
250     outfile = ROOT.TFile(outpath+'vhbb_TH_'+ROOToutname+'.root', 'RECREATE')
251     outfile.mkdir(Datacardbin,Datacardbin)
252     outfile.cd(Datacardbin)
253     # generate the Workspace:
254 peller 1.62 WS = ROOT.RooWorkspace('%s'%Datacardbin,'%s'%Datacardbin) #Zee
255 peller 1.1 print 'WS initialized'
256 bortigno 1.7 disc= ROOT.RooRealVar(name,name,xMin,xMax)
257 peller 1.1 obs = ROOT.RooArgList(disc)
258 nmohr 1.68 #
259 peller 1.1 ROOT.gROOT.SetStyle("Plain")
260 peller 1.53
261    
262 nmohr 1.68 # ToDo:
263 peller 1.53 #---- get the BKG for the rebinning calculation----
264 nmohr 1.68 #Rebinner.calculate_binning(hDummyRB,max_rel)
265     #myBinning=Rebinner(int(nBins),array('d',[-1.0]+[hDummyRB.GetBinLowEdge(i) for i in binlist]),rebin_active)
266 peller 1.53 #--------------------------------------------------
267    
268 nmohr 1.68 #order and add all together
269     final_histos = {}
270 peller 1.57
271 nmohr 1.68 print '\n\t--> Ordering and Adding Histos\n'
272 peller 1.53
273 nmohr 1.68 #NOMINAL:
274     final_histos['nominal'] = HistoMaker.orderandadd([all_histos['%s'%job][0] for job in all_samples],setup)
275 peller 1.53
276 nmohr 1.68 #SYSTEMATICS:
277     ind = 1
278     for syst in systematics:
279     for Q in UD:
280 nmohr 1.71 final_histos['%s_%s'%(systematicsnaming[syst],Q)] = HistoMaker.orderandadd([all_histos[job.name][ind] for job in all_samples],setup)
281 nmohr 1.68 ind+=1
282    
283     if weightF_sys:
284     for Q in UD:
285 nmohr 1.71 final_histos['%s_%s'%(systematicsnaming['weightF_sys'],Q)]= HistoMaker.orderandadd([all_histos[job.name][ind] for job in all_samples],setup)
286 nmohr 1.68 ind+=1
287    
288 nmohr 1.71 def get_alternate_shape(hNominal,hAlternate):
289     hVar = hAlternate.Clone()
290     hNom = hNominal.Clone()
291     hAlt = hNom.Clone()
292     hNom.Add(hVar,-1.)
293     hAlt.Add(hNom)
294     for bin in range(0,hNominal.GetNbinsX()+1):
295     if hAlt.GetBinContent(bin) < 0.: hAlt.SetBinContent(bin,0.)
296     return hVar,hAlt
297    
298     def get_alternate_shapes(all_histos,asample_dict,all_samples):
299     alternate_shapes_up = []
300     alternate_shapes_down = []
301     for job in all_samples:
302     nominal = all_histos[job.name][0]
303     if job.name in asample_dict:
304     alternate = copy(all_histos[asample_dict[job.name]][0])
305     hUp, hDown = get_alternate_shape(nominal[nominal.keys()[0]],alternate[alternate.keys()[0]])
306     alternate_shapes_up.append({nominal.keys()[0]:hUp})
307     alternate_shapes_down.append({nominal.keys()[0]:hDown})
308     else:
309     alternate_shapes_up.append(copy(nominal))
310     alternate_shapes_down.append(copy(nominal))
311     return alternate_shapes_up, alternate_shapes_down
312    
313     if addSample_sys:
314     aUp, aDown = get_alternate_shapes(all_histos,addSample_sys,all_samples)
315     final_histos['%s_Up'%(systematicsnaming['model'])]= HistoMaker.orderandadd(aUp,setup)
316     final_histos['%s_Down'%(systematicsnaming['model'])]= HistoMaker.orderandadd(aDown,setup)
317    
318 nmohr 1.68 #make statistical shapes:
319     for Q in UD:
320     final_histos['%s_%s'%(systematicsnaming['stats'],Q)] = {}
321     for job,hist in final_histos['nominal'].items():
322     for Q in UD:
323     final_histos['%s_%s'%(systematicsnaming['stats'],Q)][job] = hist.Clone()
324     for j in range(hist.GetNbinsX()+1):
325     if Q == 'Up':
326     final_histos['%s_%s'%(systematicsnaming['stats'],Q)][job].SetBinContent(j,max(0,hist.GetBinContent(j)+hist.GetBinError(j)))
327     if Q == 'Down':
328     final_histos['%s_%s'%(systematicsnaming['stats'],Q)][job].SetBinContent(j,max(0,hist.GetBinContent(j)-hist.GetBinError(j)))
329    
330     #write shapes in WS:
331     for key in final_histos:
332     for job, hist in final_histos[key].items():
333     if 'nominal' == key:
334 nmohr 1.71 hist.SetName('%s'%(Dict[job]))
335 peller 1.70 hist.Write()
336 nmohr 1.71 rooDataHist = ROOT.RooDataHist('%s' %(Dict[job]),'%s'%(Dict[job]),obs, hist)
337 nmohr 1.68 getattr(WS,'import')(rooDataHist)
338     for Q in UD:
339     if Q in key:
340     theSyst = key.replace('_%s'%Q,'')
341     else:
342     continue
343 nmohr 1.71 if systematicsnaming['stats'] in key:
344     nameSyst = '%s_%s_%s' %(theSyst,Dict[job],Datacardbin)
345     elif systematicsnaming['model'] in key:
346     nameSyst = '%s_%s' %(theSyst,Dict[job])
347 nmohr 1.68 else:
348     nameSyst = theSyst
349 nmohr 1.71 hist.SetName('%s%s%s' %(Dict[job],nameSyst,Q))
350 peller 1.70 hist.Write()
351 nmohr 1.71 rooDataHist = ROOT.RooDataHist('%s%s%s' %(Dict[job],nameSyst,Q),'%s%s%s'%(Dict[job],nameSyst,Q),obs, hist)
352 nmohr 1.68 getattr(WS,'import')(rooDataHist)
353 peller 1.48
354 peller 1.70 theData.SetName('data_obs')
355     theData.Write()
356 nmohr 1.68 rooDataHist = ROOT.RooDataHist('data_obs','data_obs',obs, theData)
357     getattr(WS,'import')(rooDataHist)
358 peller 1.13
359 nmohr 1.68 WS.writeToFile(outpath+'vhbb_WS_'+ROOToutname+'.root')
360 peller 1.47
361 nmohr 1.68 # now we have a Dict final_histos with sets of all grouped MCs for all systematics:
362     # nominal, ($SYS_Up/Down)*4, weightF_sys_Up/Down, stats_Up/Down
363 peller 1.36
364 peller 1.69 print '\n\t >>> PRINTOUT PRETTY TABLE <<<\n'
365     #header
366     printout = ''
367     printout += '%-25s'%'Process'
368     printout += ':'
369     for item, val in final_histos['nominal'].items():
370 peller 1.70 printout += '%-12s'%item
371 peller 1.69 print printout+'\n'
372 nmohr 1.68 for key in final_histos:
373     printout = ''
374 peller 1.69 printout += '%-25s'%key
375     printout += ':'
376 nmohr 1.68 for item, val in final_histos[key].items():
377 peller 1.72 printout += '%-12'%str('%0.5f'%val.Integral())
378 nmohr 1.68 print printout
379 peller 1.1
380 nmohr 1.68 #-----------------------------------------------------------------------------------------------------------
381 peller 1.36
382 peller 1.62 # -------------------- write DATAcard: ----------------------------------------------------------------------
383     DCprocessseparatordict = {'WS':':','TH':'/'}
384 nmohr 1.68 # create two datacards: for TH an WS
385 peller 1.62 for DCtype in ['WS','TH']:
386     columns=len(setup)
387     f = open(outpath+'vhbb_DC_%s_%s.txt'%(DCtype,ROOToutname),'w')
388     f.write('imax\t1\tnumber of channels\n')
389     f.write('jmax\t%s\tnumber of backgrounds (\'*\' = automatic)\n'%(columns-1))
390     f.write('kmax\t*\tnumber of nuisance parameters (sources of systematical uncertainties)\n\n')
391     f.write('shapes * * vhbb_%s_%s.root $CHANNEL%s$PROCESS $CHANNEL%s$PROCESS$SYSTEMATIC\n\n'%(DCtype,ROOToutname,DCprocessseparatordict[DCtype],DCprocessseparatordict[DCtype]))
392     f.write('bin\t%s\n\n'%Datacardbin)
393 nmohr 1.68 f.write('observation\t%s\n\n'%(int(theData.Integral())))
394     # datacard bin
395 peller 1.62 f.write('bin')
396     for c in range(0,columns): f.write('\t%s'%Datacardbin)
397     f.write('\n')
398 nmohr 1.68 # datacard process
399 peller 1.62 f.write('process')
400     for c in setup: f.write('\t%s'%Dict[c])
401     f.write('\n')
402     f.write('process')
403     for c in range(0,columns): f.write('\t%s'%c)
404     f.write('\n')
405 nmohr 1.68 # datacard yields
406 peller 1.62 f.write('rate')
407 nmohr 1.68 for c in setup:
408     f.write('\t%s'%final_histos['nominal'][c].Integral())
409 peller 1.29 f.write('\n')
410 nmohr 1.68 # get list of systematics in use
411 peller 1.62 InUse=eval(config.get('Datacard','InUse_%s'%pt_region))
412 nmohr 1.68 # write non-shape systematics
413 peller 1.62 for item in InUse:
414     f.write(item)
415     what=eval(config.get('Datacard',item))
416     f.write('\t%s'%what['type'])
417     for c in setup:
418     if c in what:
419     if item == 'CMS_eff_e' and 'Zmm' in options[10]: f.write('\t-')
420     elif item == 'CMS_eff_m' and 'Zee' in options[10]: f.write('\t-')
421     elif item == 'CMS_trigger_e' and 'Zmm' in options[10]: f.write('\t-')
422     elif item == 'CMS_trigger_m' and 'Zee' in options[10]: f.write('\t-')
423 peller 1.35 else:
424 peller 1.62 f.write('\t%s'%what[c])
425 peller 1.35 else:
426     f.write('\t-')
427     f.write('\n')
428 nmohr 1.68 # Write statistical shape variations
429 peller 1.62 if binstat:
430     for c in setup:
431     for bin in range(0,nBins):
432 nmohr 1.71 f.write('%s_%s_%s_%s\tshape'%(systematicsnaming['stats'],Dict[c], bin, options[10]))
433 peller 1.62 for it in range(0,columns):
434     if it == setup.index(c):
435     f.write('\t1.0')
436     else:
437     f.write('\t-')
438     f.write('\n')
439     else:
440 nmohr 1.51 for c in setup:
441 nmohr 1.71 f.write('%s_%s_%s\tshape'%(systematicsnaming['stats'],Dict[c], options[10]))
442 nmohr 1.37 for it in range(0,columns):
443     if it == setup.index(c):
444 peller 1.62 f.write('\t1.0')
445 nmohr 1.37 else:
446 peller 1.62 f.write('\t-')
447 nmohr 1.37 f.write('\n')
448 nmohr 1.68 # UEPS systematics
449 peller 1.62 if weightF_sys:
450     f.write('UEPS\tshape')
451     for it in range(0,columns): f.write('\t1.0')
452     f.write('\n')
453 nmohr 1.68 # additional sample systematics
454 peller 1.62 if addSample_sys:
455     alreadyAdded = []
456     for newSample in addSample_sys.iterkeys():
457     for c in setup:
458     if not c == Group[newSample]: continue
459     if Dict[c] in alreadyAdded: continue
460 nmohr 1.71 f.write('%s_%s\tshape'%(systematicsnaming['model'],Dict[c]))
461 peller 1.62 for it in range(0,columns):
462     if it == setup.index(c):
463     f.write('\t1.0')
464     else:
465     f.write('\t-')
466     f.write('\n')
467     alreadyAdded.append(Dict[c])
468 nmohr 1.68 # regular systematics
469 peller 1.62 for sys in systematics:
470     sys_factor=sys_factor_dict[sys]
471     f.write('%s\tshape'%systematicsnaming[sys])
472 peller 1.63 for c in setup:
473     if c in sys_affecting[sys]:
474     f.write('\t%s'%sys_factor)
475     else:
476     f.write('\t-')
477 peller 1.62 f.write('\n')
478     f.close()
479 nmohr 1.68 # --------------------------------------------------------------------------
480    
481    
482    
483    
484 nmohr 1.31 outfile.Close()