TreeAnalyzer/test/analysis.py

points = [['fourtop1100'], ['fourtop900'], ['fourtop700'], ['fourtop500']]
#points = [['fourtop1100']]
    
# for model building:
def get_model():
    # Read in and build the model automatically from the histograms in the root file. 
    # This model will contain all shape uncertainties given according to the templates
    # which also includes rate changes according to the alternate shapes.
    # For more info about this model and naming conventuion, see documentation
    # of build_model_from_rootfile.
    model = build_model_from_rootfile('/uscms/home/algomez/work/CMSSW_4_2_4/src/Yumiceva/TreeAnalyzer/test/mu_disc_templates.root')
    #model = build_model_from_rootfile('/uscms/home/algomez/work/CMSSW_4_2_4/src/Yumiceva/TreeAnalyzer/test/mu_stjet_templates.root')
            
    # If the prediction histogram is zero, but data is non-zero, teh negative log-likelihood
    # is infinity which causes problems for some methods. Therefore, we set all histogram
    # bin entries to a small, but positive value:
    model.fill_histogram_zerobins() # default is 1e-4

    # define what the signal processes are. All other processes are assumed to make up the 
    # 'background-only' model.
    model.set_signal_processes('fourtop*00')
            
    # Add some lognormal rate uncertainties. The first parameter is the name of the
    # uncertainty (which will also be the name of the nuisance parameter), the second
    # is the 'effect' as a fraction, the third one is the process name. The fourth parameter
    # is optional and denotes the channl. The default '*' means that the uncertainty applies
    # to all channels in the same way.
    # Note that you can use the same name for a systematic here as for a shape
    # systematic. In this case, the same parameter will be used; shape and rate changes 
    # will be 100% correlated.
    model.add_lognormal_uncertainty('lumi', math.log(1.045), '*')
    model.add_lognormal_uncertainty('lepeff', math.log(1.03), '*')
    model.add_lognormal_uncertainty('hlteff', math.log(1.03), '*')
    model.add_lognormal_uncertainty('wjets_rate', math.log(1.5), 'wjets')
    model.add_lognormal_uncertainty('zjets_rate', math.log(1.3), 'zjets')
    model.add_lognormal_uncertainty('ttbar_rate', math.log(1.3), 'ttbar')
    model.add_lognormal_uncertainty('singletop_rate', math.log(1.15), 'singletop')
                            
    return model

model = get_model()

# first, it is a good idea to generate a summary report to make sure everything has worked
# as expected. The summary will generate quite some information which should it make easy to spot
# errors like typos in the name of uncertainties, missing shape uncertaintie, etc.
model_summary(model)

# 2. apply the methods

# 2.a. Bayesian limits
# Calculate expected and observed Bayesian limits. For faster run time of this example,
# only make a few mass points. (Omitting the 'signal_procsses' parameter completely would
# process all signals defined as signal processes before; see Section "Common Parameters"
# on the theta auto intro doxygen page for details)
#res = ml_fit_coefficients(model, signal_processes = [''], nuisance_constraint="shape:fix")
#for p in res['']['mu_disc*']:
#    print('%s: %f' % (p, res['']['mu_disc*'][p]))
plot_exp, plot_obs = bayesian_limits(model, signal_processes = points)

# plot_exp and plot_obs are instances of plotutil.plotdata. they contain x/y values and
# bands. You can do many things with these objects such as inspect the x/y/ban
# data, pass then to plotutil.plot routine to make pdf plots, ...
# Here, we will just create text files of the plot data. This is useful if you want
# to apply your own plotting routines or present the result in a text Table.
#plot_exp.write_txt('bayesian_limits_expected_'+points[0][0]+'.txt')
plot_exp.write_txt('bayesian_limits_expected_fourtop_test.txt')
#plot_exp.write_txt('bayesian_limits_expected_stjet_fourtop.txt')
#plot_obs.write_txt('bayesian_limits_observed_'+points[0][0]+'.txt')
plot_obs.write_txt('bayesian_limits_observed_fourtop_test.txt')
#plot_obs.write_txt('bayesian_limits_observed_stjet_fourtop.txt')

    #plot_exp, plot_obs = cls_limits(model,ts="lhclike", signal_processes = points)
    ##plot_exp, plot_obs = cls_limits(model, signal_processes = points)
    #plot_exp.write_txt('cls_limits_expected_'+points[0][0]+'.txt')
    #plot_obs.write_txt('cls_limits_observed_'+points[0][0]+'.txt')

# model_summary, bayesian_limits, and cls_limits also write their results to the 'report' object
# which we can ask to write its results as html page to a certain directory. Use an existing, empty
# directory and point your web browser to it.
#report.write_html('htmlout_'+points[0][0])
report.write_html('htmlout_fourtopBDT_test')
#report.write_html('htmlout_fourtop_stjet')
#report.write_html('htmlout_stjet_'+points[0][0])

# After running theta-auto, you probably want to delete the 'analysis' directory which
# contains intermediate results. Keeping it avoids re-running theta unnecessarily for unchanged configurations
# (e.g., because you just want to change the plot). However, this directory can grow very large over time.
Revision:	1.2
Committed:	Thu Mar 1 17:51:53 2012 UTC (13 years, 2 months ago) by algomez
Content type:	text/x-python
Branch:	MAIN
CVS Tags:	HEAD
Changes since 1.1:	+17 -11 lines
Log Message:	new version of theta file to calculate limits
#	User	Rev	Content
1	algomez	1.2	points = [['fourtop1100'], ['fourtop900'], ['fourtop700'], ['fourtop500']]
2			#points = [['fourtop1100']]
3	algomez	1.1
4			# for model building:
5			def get_model():
6			# Read in and build the model automatically from the histograms in the root file.
7			# This model will contain all shape uncertainties given according to the templates
8			# which also includes rate changes according to the alternate shapes.
9			# For more info about this model and naming conventuion, see documentation
10			# of build_model_from_rootfile.
11	algomez	1.2	model = build_model_from_rootfile('/uscms/home/algomez/work/CMSSW_4_2_4/src/Yumiceva/TreeAnalyzer/test/mu_disc_templates.root')
12			#model = build_model_from_rootfile('/uscms/home/algomez/work/CMSSW_4_2_4/src/Yumiceva/TreeAnalyzer/test/mu_stjet_templates.root')
13	algomez	1.1
14			# If the prediction histogram is zero, but data is non-zero, teh negative log-likelihood
15			# is infinity which causes problems for some methods. Therefore, we set all histogram
16			# bin entries to a small, but positive value:
17			model.fill_histogram_zerobins() # default is 1e-4
18
19			# define what the signal processes are. All other processes are assumed to make up the
20			# 'background-only' model.
21	algomez	1.2	model.set_signal_processes('fourtop*00')
22	algomez	1.1
23			# Add some lognormal rate uncertainties. The first parameter is the name of the
24			# uncertainty (which will also be the name of the nuisance parameter), the second
25			# is the 'effect' as a fraction, the third one is the process name. The fourth parameter
26			# is optional and denotes the channl. The default '*' means that the uncertainty applies
27			# to all channels in the same way.
28			# Note that you can use the same name for a systematic here as for a shape
29			# systematic. In this case, the same parameter will be used; shape and rate changes
30			# will be 100% correlated.
31			model.add_lognormal_uncertainty('lumi', math.log(1.045), '*')
32			model.add_lognormal_uncertainty('lepeff', math.log(1.03), '*')
33			model.add_lognormal_uncertainty('hlteff', math.log(1.03), '*')
34			model.add_lognormal_uncertainty('wjets_rate', math.log(1.5), 'wjets')
35			model.add_lognormal_uncertainty('zjets_rate', math.log(1.3), 'zjets')
36			model.add_lognormal_uncertainty('ttbar_rate', math.log(1.3), 'ttbar')
37			model.add_lognormal_uncertainty('singletop_rate', math.log(1.15), 'singletop')
38
39			return model
40
41			model = get_model()
42
43			# first, it is a good idea to generate a summary report to make sure everything has worked
44			# as expected. The summary will generate quite some information which should it make easy to spot
45			# errors like typos in the name of uncertainties, missing shape uncertaintie, etc.
46			model_summary(model)
47
48			# 2. apply the methods
49
50			# 2.a. Bayesian limits
51			# Calculate expected and observed Bayesian limits. For faster run time of this example,
52			# only make a few mass points. (Omitting the 'signal_procsses' parameter completely would
53			# process all signals defined as signal processes before; see Section "Common Parameters"
54			# on the theta auto intro doxygen page for details)
55			#res = ml_fit_coefficients(model, signal_processes = [''], nuisance_constraint="shape:fix")
56	algomez	1.2	#for p in res['']['mu_disc*']:
57			# print('%s: %f' % (p, res['']['mu_disc*'][p]))
58	algomez	1.1	plot_exp, plot_obs = bayesian_limits(model, signal_processes = points)
59
60			# plot_exp and plot_obs are instances of plotutil.plotdata. they contain x/y values and
61			# bands. You can do many things with these objects such as inspect the x/y/ban
62			# data, pass then to plotutil.plot routine to make pdf plots, ...
63			# Here, we will just create text files of the plot data. This is useful if you want
64			# to apply your own plotting routines or present the result in a text Table.
65	algomez	1.2	#plot_exp.write_txt('bayesian_limits_expected_'+points[0][0]+'.txt')
66			plot_exp.write_txt('bayesian_limits_expected_fourtop_test.txt')
67			#plot_exp.write_txt('bayesian_limits_expected_stjet_fourtop.txt')
68			#plot_obs.write_txt('bayesian_limits_observed_'+points[0][0]+'.txt')
69			plot_obs.write_txt('bayesian_limits_observed_fourtop_test.txt')
70			#plot_obs.write_txt('bayesian_limits_observed_stjet_fourtop.txt')
71	algomez	1.1
72			#plot_exp, plot_obs = cls_limits(model,ts="lhclike", signal_processes = points)
73			##plot_exp, plot_obs = cls_limits(model, signal_processes = points)
74			#plot_exp.write_txt('cls_limits_expected_'+points[0][0]+'.txt')
75			#plot_obs.write_txt('cls_limits_observed_'+points[0][0]+'.txt')
76
77			# model_summary, bayesian_limits, and cls_limits also write their results to the 'report' object
78			# which we can ask to write its results as html page to a certain directory. Use an existing, empty
79			# directory and point your web browser to it.
80	algomez	1.2	#report.write_html('htmlout_'+points[0][0])
81			report.write_html('htmlout_fourtopBDT_test')
82			#report.write_html('htmlout_fourtop_stjet')
83			#report.write_html('htmlout_stjet_'+points[0][0])
84	algomez	1.1
85			# After running theta-auto, you probably want to delete the 'analysis' directory which
86			# contains intermediate results. Keeping it avoids re-running theta unnecessarily for unchanged configurations
87			# (e.g., because you just want to change the plot). However, this directory can grow very large over time.