ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/Jeng/scripts/ikstest.cc
Revision: 1.2
Committed: Thu Sep 2 04:50:39 2010 UTC (14 years, 8 months ago) by jengbou
Content type: text/plain
Branch: MAIN
Changes since 1.1: +226 -55 lines
Log Message:
update

File Contents

# Content
1 #include <vector>
2 #include <TFile.h>
3 #include <TH1.h>
4 #include <TH2.h>
5 #include <TLatex.h>
6 #include <TCanvas.h>
7 #include <TMath.h>
8 #include <TLegend.h>
9 #include <TObject.h>
10 #include <iostream>
11 #include <sstream>
12 #include <fstream>
13 #include <iomanip>
14 #include <map>
15
16 using namespace std;
17
18 // User defined parameters
19 bool useInv = false; // whether to use n-1 QCD template
20 bool realData = false;
21 const double stepsize = 0.001;
22
23 //define the constants: 2.78/pb
24 const double weight_[3] = {0.0506107, //QCD
25 0.0088222, //WJets
26 0.000295 //TTbar
27 };
28 const Double_t procQCD = 1.46;
29 const Double_t procWjets = 1.03;
30 const Double_t procttjets = 1.0;
31
32 struct testMC {
33 testMC(Double_t p = 0., Double_t sb = 0., Double_t ss = 0.){prob=p; scaleF_backg = sb; scaleF_sample = ss;}
34 Double_t prob;
35 Double_t scaleF_backg;
36 Double_t scaleF_sample;
37 };
38
39 //function for doing KS test
40 vector<testMC> doKStest(Double_t NsS, Double_t Ns1, Double_t Ns2, TH1F* mixS, TH1F* s1, TH1F* s2) {
41 vector<testMC> output;
42 //define the scale factors
43 Double_t sf1 = 0.0; // QCD
44 Double_t sf2 = 0.0; // Wjets
45 //KS test
46 do {
47 sf1 = (NsS - Ns2*sf2)/Ns1;
48 if (sf1 < 0) break;
49 //cout << "..........sf1 = " << sf1 << endl;
50 int nbins = mixS->GetNbinsX();
51 double xmin = mixS->GetXaxis()->GetBinLowEdge(1);
52 double xmax = mixS->GetXaxis()->GetBinUpEdge(nbins);
53 TH1F *test = new TH1F("test", "", nbins, xmin, xmax);
54 test -> Sumw2();
55 test -> Add(s1,s2,sf1,sf2);
56 //test->Scale(1./(1.*test->Integral()));
57 TString option = (!useInv && !realData) ? "N" : "";
58 Double_t probability = mixS -> KolmogorovTest(test,option);
59 testMC temp = testMC(probability,sf1,sf2);
60 output.push_back(temp);
61 // cout << "probability = " << setw(15) << temp.prob
62 // << "; sfQCD = " << setw(10) << temp.scaleF_backg
63 // << "; sfWjets = " << setw(6) << temp.scaleF_sample << endl;
64 delete test;
65 sf2 = sf2 + stepsize;
66 } while(sf1 > 0 && sf2 <= 2.0);
67 return output;
68 }
69
70 //get the maximum KS test result
71 testMC getMax(vector<testMC> vec) {
72 testMC maxKSRes;
73 Double_t maximum = 0.0;
74 for (size_t i = 0; i < vec.size(); i++) {
75 if (maximum < vec.at(i).prob) {
76 maximum = vec.at(i).prob;
77 maxKSRes = vec.at(i);
78 }
79 }
80 cout << "for maximum: " << setw(12) << maxKSRes.prob
81 << "; sb = " << setw(10) << maxKSRes.scaleF_backg
82 << "; ss = " << setw(5) << maxKSRes.scaleF_sample << endl;
83 return maxKSRes;
84 }
85
86
87 //=================================
88 // Main program
89 //=================================
90 void ikstest() {
91 //Style();
92 TLatex *latex = new TLatex();
93 latex->SetNDC();
94
95 ofstream outprint( "ikstest_results_20100901.txt" );
96 //open the files with histograms
97 map<string,TFile*> mfile;
98 mfile["Data"] = TFile::Open("skimmed_Data_20100901/Data_RefSel_v3.root");
99 // n-1 cuts
100 if (useInv) {
101 if (realData)
102 // mfile["InvSel"] = TFile::Open("skimmed_Data_20100825/Data_D0ge0p03.root");
103 mfile["InvSel"] = TFile::Open("skimmed_Data_20100901/Data_RelIsoge0p1_v3.root");
104 else
105 // mfile["InvSel"] = TFile::Open("skimmed_MC/QCD_D0ge0p03.root");
106 mfile["InvSel"] = TFile::Open("skimmed_MC/QCD_RelIsoge0p1_v3.root");
107 }
108
109 // RefSel MC
110 mfile["0"] = TFile::Open("skimmed_MC/QCD_RefSel_v3.root");
111 mfile["1"] = TFile::Open("skimmed_MC/WJets_RefSel_v3.root");
112 mfile["2"] = TFile::Open("skimmed_MC/TTbar_RefSel_v3.root");
113
114 //define histograms and related parameters
115 string histoName[3] = {"h_mu_pt_calo","h_met_calo","h_mt_calo"};
116 string histoLabelX[3] = {"p_{T}^{good Muons}", "E_{T}^{#nu}", "m_{T}^{W}"};
117 Int_t xbins[3] = {20,20,40};
118 Double_t xlow[3] = {0.,0.,0.};
119 Double_t xhigh[3] = {100.,100.,200.};
120 string sample[3] = {"QCD","Wjets","ttjets"};
121
122 TH1F* h_[9];
123 TH1F* mixh_[3];
124 TH1F* hQCD_NEW[3];
125 TH1F* hKSres_[3];
126 TH1F* hKSvalues_[3];
127
128 //load the histograms from the root files
129 for (int i = 0; i < 3; i++) {// 3 variables
130 //cout << "file[" << i << "] : " << endl;
131 string nameNewHisto = "mix_"+histoName[i];
132 string nameNewHistoSFKS = "finalSF_"+histoName[i];
133 string nameNewHistoKSvalues = "KSvalues_"+histoLabelX[i];
134
135 mixh_[i] = new TH1F(nameNewHisto.c_str(),"",xbins[i],xlow[i],xhigh[i]);
136 hKSres_[i] = new TH1F(nameNewHistoSFKS.c_str(),"",xbins[i],xlow[i],xhigh[i]);
137 hKSvalues_[i] = new TH1F(nameNewHistoKSvalues.c_str(),"",2./stepsize, stepsize, 2.+stepsize);
138
139 if (!useInv) {//use QCD MC sample
140 hQCD_NEW[i] = (TH1F*) mfile["0"]->Get(TString(histoName[i]))->Clone();
141 hQCD_NEW[i] -> Scale(weight_[0]);
142 hQCD_NEW[i] -> SetName((histoName[i]).c_str());
143 }
144 else {
145 hQCD_NEW[i] = (TH1F*) mfile["InvSel"]->Get(TString(histoName[i]));
146 if (!realData) hQCD_NEW[i] -> Scale(weight_[0]);
147 hQCD_NEW[i] -> SetName((histoName[i]).c_str());
148 }
149
150 mixh_[i] -> Sumw2();
151 hKSres_[i] -> Sumw2();
152 hKSvalues_[i] -> Sumw2();
153 }
154
155 for (int n = 0; n < 3; ++n) {// 3 MC samples
156 for (int ihisto = 0; ihisto < 3; ihisto++) {// 3 variables
157 //cout << "Variable[" << ihisto << "]" << endl;
158 string histo_name = histoName[ihisto]+sample[n];
159 ostringstream ss;
160 ss << n;
161 h_[n*3+ihisto] = (TH1F*) mfile[ss.str()]->Get(TString(histoName[ihisto]))->Clone();
162 h_[n*3+ihisto] -> Scale(weight_[n]);
163 h_[n*3+ihisto] -> SetName(histo_name.c_str());
164 }
165 }
166
167 //create the mixed samples = "data"
168 TCanvas *canvas0 = new TCanvas("Data","Data distributions");
169 canvas0->Divide(3,1);
170 for (int i = 0; i < 3; i++) {
171 canvas0->cd(i+1);
172 if (!realData) {
173 mixh_[i] -> Add(h_[i],h_[i+3], procQCD,procWjets);
174 //mixh_[i] -> Add(mixh_[i],h_[i+6], 1,procttjets);
175 //cout << "histo_name: " << mixh_[0]->GetNbinsX() << endl;
176 }
177 else {
178 TH1F *htmp = (TH1F*) mfile["Data"]->Get(TString(histoName[i]));
179 mixh_[i] -> Add(htmp,1.);
180 }
181 mixh_[i]->DrawClone();
182 }
183 canvas0->SaveAs("Data_distributions.pdf");
184
185 //define the weight corrections for each sample
186 double NevData = mixh_[2]->Integral();
187 double corr_NevQCD = h_[2]->Integral();
188 double corr_NevQCD_NEW = hQCD_NEW[2]->Integral();
189 double corr_NevWjets = h_[5]->Integral();
190 double corr_Nevttjets = h_[8]->Integral();
191 double corr_Nevmix = procQCD*corr_NevQCD+procWjets*corr_NevWjets;
192 //double corr_Nevmix = procQCD*corr_NevQCD+procWjets*corr_NevWjets+procttjets*corr_Nevttjets;
193 if (!realData)
194 outprint << "Events mix sample = " << corr_Nevmix << endl;
195 else
196 outprint << "Events in Data = " << NevData << endl;
197 outprint << "Events QCD sample = " << corr_NevQCD << endl;
198 outprint << "Events Wjets sample = " << corr_NevWjets << endl;
199 outprint << "Events InvSel QCD sample = " << corr_NevQCD_NEW << endl;
200
201 //define the containers for chosen numbers (coressponding to the max KStest result)
202 testMC maxProb[3];
203
204 //define the scale factors calculated using information obtained from all parameters
205 Double_t SFbackg = 0.0;
206 Double_t sumSFbackg = 0.0;
207 Double_t SFsample = 0.0;
208 Double_t sumSFsample = 0.0;
209 Double_t allKS = 0.0;
210
211 //do the KS test by varying the scale factors
212 for (int i = 0; i < 3; i++) { // 3 variables
213 TH1F *data = (TH1F*)mixh_[i]->Clone();
214 data -> SetName("dataClone");
215 //data -> Scale(1./data->Integral());
216 vector<testMC> resultsKS = doKStest((realData ? NevData : corr_Nevmix),
217 (useInv ? corr_NevQCD_NEW : corr_NevQCD),
218 corr_NevWjets,
219 data, hQCD_NEW[i], h_[i+3]);
220 testMC tksmax = getMax(resultsKS);
221 maxProb[i] = tksmax;
222 outprint << "\nFor the plot " << histoLabelX[i] << " the results are:"<< endl;
223 outprint << "\tmax Probability = " << maxProb[i].prob << endl;
224 outprint << "\tproc_background = " << maxProb[i].scaleF_backg << endl;
225 outprint << "\tproc_sample = " << maxProb[i].scaleF_sample << endl;
226
227 outprint << "\n\tpercent_B of Data = "
228 << maxProb[i].scaleF_backg*corr_NevQCD_NEW*100/(realData ? NevData : corr_Nevmix) << endl;
229 outprint << "\tpercent_S of Data = "
230 << maxProb[i].scaleF_sample*corr_NevWjets*100/(realData ? NevData : corr_Nevmix) << endl;
231 outprint << "---------------------------" << endl;
232
233 //create the mixed samples with KS test results
234 sumSFbackg += maxProb[i].prob*maxProb[i].scaleF_backg;
235 sumSFsample += maxProb[i].prob*maxProb[i].scaleF_sample;
236 allKS += maxProb[i].prob;
237
238 //fill a histogram with the results from the KS test for each variable
239 for (int jiter = 0; jiter < resultsKS.size(); jiter++) {
240 if (resultsKS.at(jiter).prob == 1.)
241 cout << "variable [" << i << "]: prob[" << jiter << "]= " << resultsKS.at(jiter).prob << endl;
242 hKSvalues_[i]->SetBinContent(jiter,resultsKS.at(jiter).prob);
243 }
244 delete data;
245 }
246
247 //calculate the final scale factors
248 SFbackg = sumSFbackg/allKS;
249 SFsample = sumSFsample/allKS;
250 outprint << "allKS = " << allKS << "\tbackground = " << SFbackg << "\tsample = " << SFsample << endl;
251 outprint << "\tcombined percent_B of Data = "
252 << SFbackg*corr_NevQCD_NEW*100/(realData ? NevData : corr_Nevmix) << endl;
253 outprint << "\tcombined percent_S of Data = "
254 << SFsample*corr_NevWjets*100/(realData ? NevData : corr_Nevmix) << endl;
255 outprint << "\n" << endl;
256 outprint << "=================================" << endl;
257 outprint << "\n" << endl;
258
259
260 //=================================
261 // Plots
262 //=================================
263 for (int i = 0; i < 3; i++) {// 3 variables
264 hKSres_[i] -> Add(hQCD_NEW[i],h_[i+3],SFbackg,SFsample);
265 outprint << "hKSres->Integral() = " << hKSres_[i]->Integral() << endl;
266 outprint << "Data->Integral() = " << mixh_[i]->Integral() << endl;
267
268 mixh_[i]->Rebin(2);
269 hQCD_NEW[i]->Rebin(2);
270 h_[i]->Rebin(2);
271 h_[i+3]->Rebin(2);
272 hKSres_[i]->Rebin(2);
273 //hKSvalues_[i]->Rebin(2);
274
275 mixh_[i] ->SetLineColor(1);
276 hQCD_NEW[i] ->SetLineColor(3);
277 h_[i] ->SetLineColor(6);
278 h_[i+3] ->SetLineColor(4);
279 hKSres_[i] ->SetLineColor(2);
280 hKSvalues_[i]->SetLineColor(i+1);
281
282 mixh_[i] ->SetMarkerColor(1);
283 hQCD_NEW[i] ->SetMarkerColor(3);
284 h_[i] ->SetMarkerColor(6);
285 h_[i+3] ->SetMarkerColor(4);
286 hKSres_[i] ->SetMarkerColor(2);
287 hKSvalues_[i]->SetMarkerColor(i+1);
288
289 mixh_[i] ->SetMarkerStyle(24);
290 hQCD_NEW[i] ->SetMarkerStyle(20);
291 h_[i] ->SetMarkerStyle(20);
292 h_[i+3] ->SetMarkerStyle(20);
293 hKSres_[i] ->SetMarkerStyle(20);
294 hKSvalues_[i]->SetMarkerStyle(20);
295
296 mixh_[i] ->SetMarkerSize(1.4);
297 hQCD_NEW[i] ->SetMarkerSize(1.1);
298 h_[i] ->SetMarkerSize(1.1);
299 h_[i+3] ->SetMarkerSize(1.1);
300 hKSres_[i] ->SetMarkerSize(0.9);
301 hKSvalues_[i]->SetMarkerSize(1.1);
302
303 mixh_[i] ->SetStats(0);
304 hQCD_NEW[i] ->SetStats(0);
305 h_[i] ->SetStats(0);
306 h_[i+3] ->SetStats(0);
307 hKSres_[i] ->SetStats(0);
308 hKSvalues_[i]->SetStats(0);
309
310 mixh_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
311 mixh_[i]->GetYaxis()->SetTitle("Entries");
312 hKSres_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
313 hKSres_[i]->GetYaxis()->SetTitle("Entries");
314 hKSvalues_[i]->GetXaxis()->SetTitle("iteration #");
315 hKSvalues_[i]->GetYaxis()->SetTitle("KS test values");
316
317 string nameCanvas1 = histoName[i]+"_QCD.pdf";
318 TCanvas *canvas1 = new TCanvas(nameCanvas1.c_str(), "");
319 hQCD_NEW[i] -> Scale(1./hQCD_NEW[i]->Integral());
320 h_[i] -> Scale(1./h_[i]->Integral());
321 h_[i+3] -> Scale(1./h_[i+3]->Integral());
322 outprint << "For " << histoName[i] << " , the KStest result btw MC_QCD/InvSel is = "
323 << h_[i] -> KolmogorovTest(hQCD_NEW[i],"") << endl;
324 hQCD_NEW[i]->Draw("P");
325 h_[i]->Draw("sameP");
326 h_[i+3]->Draw("sameP");
327 TLegend *legend1 = new TLegend(0.7, 0.70, 0.9, 0.85);
328 legend1->AddEntry(h_[i], "default");
329 legend1->AddEntry(h_[i+3], "W+jets");
330 legend1->AddEntry(hQCD_NEW[i], "new");
331 legend1->Draw();
332 legend1->SetFillColor(kWhite);
333 latex->DrawLatex(0.22,0.91,histoName[i].c_str());
334 canvas1->SetLogy();
335 canvas1->SaveAs(nameCanvas1.c_str());
336
337 string nameCanvas2 = histoName[i]+"_dataKS.pdf";
338 TCanvas *canvas2 = new TCanvas(nameCanvas2.c_str(), "");
339 hKSres_[i]->Draw("P");
340 mixh_[i]->Draw("sameP");
341 TLegend *legend2 = new TLegend(0.7, 0.70, 0.9, 0.85);
342 legend2->AddEntry(mixh_[i], "Data");
343 legend2->AddEntry(hKSres_[i], "KS result");
344 legend2->Draw();
345 legend2->SetFillColor(kWhite);
346 latex->DrawLatex(0.22,0.91,histoName[i].c_str());
347 canvas2->SetLogy();
348 canvas2->SaveAs(nameCanvas2.c_str());
349
350 }
351
352
353 TCanvas *canvas3 = new TCanvas("KStestValues", "");
354 //hKSvalues_[0]->GetXaxis()->SetRangeUser(0.9,1.1);
355 //hKSvalues_[0]->GetYaxis()->SetRangeUser(1e-3,1.1);
356 hKSvalues_[0]->Draw();
357 hKSvalues_[1]->SetLineColor(2);
358 hKSvalues_[1]->Draw("same");
359 hKSvalues_[2]->SetLineColor(4);
360 hKSvalues_[2]->Draw("same");
361 TLegend *legend3 = new TLegend(0.7, 0.70, 0.9, 0.85);
362 legend3->AddEntry(hKSvalues_[0], "muon_pT");
363 legend3->AddEntry(hKSvalues_[1], "MET");
364 legend3->AddEntry(hKSvalues_[2], "W_mT");
365 legend3->Draw();
366 legend3->SetFillColor(kWhite);
367 latex->DrawLatex(0.22,0.91,"KS test values");
368 canvas3->SetLogy();
369 string nameCanvas3 = "KStestValues_newQCD.pdf";
370 canvas3->SaveAs(nameCanvas3.c_str());
371
372 }