ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/Jeng/scripts/ikstest.cc
Revision: 1.5
Committed: Tue Sep 7 03:59:59 2010 UTC (14 years, 8 months ago) by jengbou
Content type: text/plain
Branch: MAIN
Changes since 1.4: +326 -295 lines
Log Message:
update

File Contents

# Content
1 #include "ikstest.h"
2
3 //=================================
4 // Program to run IKS test
5 // * Input directories:
6 // * Data: skimmed_Data_x.xxpb-1
7 // => default selection : Data_<suffix>.root
8 // => n-1 selection : Data_<suffix>_<invName>.root
9 // * MC : skimmed_MC {QCD,WJets,TTbar...}
10 // => default selection : QCD_<suffix>.root
11 // => n-1 selection : Data_<suffix>_<invName>.root
12 // * Upmost output dir specified by <desDir>
13 // subdirectory created according to <useInv> and <realData>
14 // => <desDir'><xyz><suffix>.pdf (.txt)
15 // * e.g. to do KS test on data with MC QCD shape in signal region:
16 // useInv=false; realData=true
17 //=================================
18
19 using namespace std;
20
21 //define the constants: 2.88/pb
22 const double weight_[3] = {0.0524313, //QCD
23 0.0091395, //WJets
24 0.000306 //TTbar
25 };
26 const Double_t procQCD = 1.46;
27 const Double_t procWjets = 1.03;
28 const Double_t procttjets = 1.0;
29
30 // Output directory
31 TString desDir = "Results_2.88pb-1/";
32 // User defined parameters
33 bool useInv = false; // whether to use n-1 QCD template
34 bool realData = false;
35 // Ntuples to use
36 TString suffix = "Sel0"; // Suffix of selection
37 TString invNames[2] = {"RelIsogt0p1","D0gt0p02"};
38 map<TString,TCanvas*> cvs; // map of usual histogram
39
40 //=================================
41 // Main program
42 //=================================
43 void ikstest() {
44 gROOT->SetStyle("CMS");
45 TLatex *latex = new TLatex();
46 latex->SetNDC();
47 TString invName;
48 int size_ninv = (useInv ? 2 : 1);
49 for (int ninv = 0;ninv < size_ninv; ++ninv) {
50 invName = invNames[ninv];
51 if (!useInv) {
52 if (!realData) desDir += "MC/";
53 else desDir += "Data_MC/";
54 } else {
55 if (!realData) desDir += "MC_"+invName+"/";
56 else desDir += "Data_"+invName+"/";
57 }
58 struct stat stDir;
59 if (!stat(desDir,&stDir)){
60 cout << "Output folder exists! Continues? (enter to continue; 'q' for quit)" << endl;
61 char incmd;
62 cin.get(incmd);
63 if (incmd == 'q') return;
64 } else {
65 cout << "Creating folder : " << desDir << endl;
66 if (mkdir(desDir,0755) == -1){
67 std::cerr << "Error creating folder" << endl;
68 return;
69 }
70 }
71
72 ofstream outprint(TString(desDir+"Results_"+suffix+".txt"));
73 //open the files with histograms
74 map<string,TFile*> mfile;
75 mfile["Data"] = TFile::Open(TString("skimmed_Data_2.88pb-1/Data_"+suffix+".root"));
76 // n-1 cuts
77 if (useInv) {
78 if (realData)
79 mfile["InvSel"] = TFile::Open(TString("skimmed_Data_2.88pb-1/Data_"+suffix+"_"+invName+".root"));
80 else
81 mfile["InvSel"] = TFile::Open(TString("skimmed_MC/v5/QCD_"+suffix+"_"+invName+".root"));
82 }
83 // RefSel MC
84 mfile["0"] = TFile::Open(TString("skimmed_MC/v5/QCD_"+suffix+".root"));
85 mfile["1"] = TFile::Open(TString("skimmed_MC/v5/WJets_"+suffix+".root"));
86 mfile["2"] = TFile::Open(TString("skimmed_MC/v5/TTbar_"+suffix+".root"));
87
88 //define histograms and related parameters
89 string histoName[3] = {"h_mu_pt_calo","h_met_calo","h_mt_calo"};
90 string histoLabelX[3] = {"p_{T}^{#mu} [GeV/c]", "#slash{E}_{T} [GeV/c]", "M_{T}^{W} [GeV/c^{2}]"};
91 Int_t xbins[3] = {20,20,40};
92 Double_t xlow[3] = {0.,0.,0.};
93 Double_t xhigh[3] = {100.,100.,200.};
94 string sample[3] = {"QCD","Wjets","ttjets"};
95
96 TH1F* h_[9];
97 TH1F* mixh_[3];
98 TH1F* hQCD_NEW[3];
99 TH1F* hKSres_[3];
100 TH1F* hKSvalues_[3];
101
102 //load the histograms from the root files
103 for (int i = 0; i < 3; i++) {// 3 variables
104 //cout << "file[" << i << "] : " << endl;
105 string nameNewHisto = "mix_"+histoName[i];
106 string nameNewHistoSFKS = "finalSF_"+histoName[i];
107 string nameNewHistoKSvalues = "KSvalues_"+histoLabelX[i];
108
109 mixh_[i] = new TH1F(nameNewHisto.c_str(),"",xbins[i],xlow[i],xhigh[i]);
110 hKSres_[i] = new TH1F(nameNewHistoSFKS.c_str(),"",xbins[i],xlow[i],xhigh[i]);
111 hKSvalues_[i] = new TH1F(nameNewHistoKSvalues.c_str(),"",2./stepsize, stepsize, 2.+stepsize);
112
113 if (!useInv) {//use QCD MC sample
114 hQCD_NEW[i] = (TH1F*) mfile["0"]->Get(TString(histoName[i]))->Clone();
115 hQCD_NEW[i] -> Scale(weight_[0]);
116 hQCD_NEW[i] -> SetName((histoName[i]).c_str());
117 }
118 else {
119 hQCD_NEW[i] = (TH1F*) mfile["InvSel"]->Get(TString(histoName[i]));
120 if (!realData) hQCD_NEW[i] -> Scale(weight_[0]);
121 hQCD_NEW[i] -> SetName((histoName[i]).c_str());
122 }
123
124 mixh_[i] -> Sumw2();
125 hKSres_[i] -> Sumw2();
126 hKSvalues_[i] -> Sumw2();
127 }
128
129 for (int n = 0; n < 3; ++n) {// 3 MC samples
130 for (int ihisto = 0; ihisto < 3; ihisto++) {// 3 variables
131 //cout << "Variable[" << ihisto << "]" << endl;
132 string histo_name = histoName[ihisto]+sample[n];
133 ostringstream ss;
134 ss << n;
135 h_[n*3+ihisto] = (TH1F*) mfile[ss.str()]->Get(TString(histoName[ihisto]))->Clone();
136 h_[n*3+ihisto] -> Scale(weight_[n]);
137 h_[n*3+ihisto] -> SetName(histo_name.c_str());
138 }
139 }
140
141 //create the mixed samples = "data"
142 TString cvsName0 = "Data";
143 if (useInv) {
144 cvsName0 += "_";
145 cvsName0 += invName;
146 }
147 cvs[cvsName0] = new TCanvas(cvsName0,"Data distributions",600,700);
148 cvs[cvsName0]->Divide(3,1);
149 for (int i = 0; i < 3; i++) {
150 cvs[cvsName0]->cd(i+1);
151 if (!realData) {
152 mixh_[i] -> Add(h_[i],h_[i+3], procQCD,procWjets);
153 //mixh_[i] -> Add(mixh_[i],h_[i+6], 1,procttjets);
154 //cout << "histo_name: " << mixh_[0]->GetNbinsX() << endl;
155 }
156 else {
157 TH1F *htmp = (TH1F*) mfile["Data"]->Get(TString(histoName[i]));
158 mixh_[i] -> Add(htmp,1.);
159 }
160 mixh_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
161 mixh_[i]->GetYaxis()->SetTitle("Entries");
162 mixh_[i]->DrawClone();
163 }
164 cvs[cvsName0]->SaveAs(TString(desDir+"Data_distributions.pdf"));
165
166 //define the weight corrections for each sample
167 double NevData = mixh_[2]->Integral();
168 double corr_NevQCD = h_[2]->Integral();
169 double corr_NevQCD_NEW = hQCD_NEW[2]->Integral();
170 double corr_NevWjets = h_[5]->Integral();
171 double corr_Nevttjets = h_[8]->Integral();
172 double corr_Nevmix = procQCD*corr_NevQCD+procWjets*corr_NevWjets;
173 //double corr_Nevmix = procQCD*corr_NevQCD+procWjets*corr_NevWjets+procttjets*corr_Nevttjets;
174 if (!realData)
175 outprint << "Events mix sample = " << corr_Nevmix << endl;
176 else
177 outprint << "Events in Data = " << NevData << endl;
178 outprint << "Events QCD sample = " << corr_NevQCD << endl;
179 outprint << "Events Wjets sample = " << corr_NevWjets << endl;
180 outprint << "Events InvSel sample = " << corr_NevQCD_NEW << endl;
181
182 //define the containers for chosen numbers (coressponding to the max KStest result)
183 testMC maxProb[3];
184
185 //define the scale factors calculated using information obtained from all parameters
186 Double_t SFbackg = 0.0;
187 Double_t sumSFbackg = 0.0;
188 Double_t SFsample = 0.0;
189 Double_t sumSFsample = 0.0;
190 Double_t allKS = 0.0;
191
192 //do the KS test by varying the scale factors
193 for (int i = 0; i < 3; i++) { // 3 variables
194 TH1F *data = (TH1F*)mixh_[i]->Clone();
195 data -> SetName("dataClone");
196 //data -> Scale(1./data->Integral());
197 vector<testMC> resultsKS = doKStest((realData ? NevData : corr_Nevmix),
198 (useInv ? corr_NevQCD_NEW : corr_NevQCD),
199 corr_NevWjets,
200 data, hQCD_NEW[i], h_[i+3]);
201 testMC tksmax = getMax(resultsKS);
202 maxProb[i] = tksmax;
203 outprint << "\nFor the plot " << histoLabelX[i] << " the results are:"<< endl;
204 outprint << "\tmax Probability = " << maxProb[i].prob << endl;
205 outprint << "\tproc_background = " << maxProb[i].scaleF_backg << endl;
206 outprint << "\tproc_sample = " << maxProb[i].scaleF_sample << endl;
207
208 outprint << "\n\tpercent_B of Data = "
209 << maxProb[i].scaleF_backg*corr_NevQCD_NEW*100/(realData ? NevData : corr_Nevmix) << endl;
210 outprint << "\tpercent_S of Data = "
211 << maxProb[i].scaleF_sample*corr_NevWjets*100/(realData ? NevData : corr_Nevmix) << endl;
212 outprint << "---------------------------" << endl;
213
214 //create the mixed samples with KS test results
215 sumSFbackg += maxProb[i].prob*maxProb[i].scaleF_backg;
216 sumSFsample += maxProb[i].prob*maxProb[i].scaleF_sample;
217 allKS += maxProb[i].prob;
218
219 //fill a histogram with the results from the KS test for each variable
220 for (int jiter = 0; jiter < resultsKS.size(); jiter++) {
221 if (resultsKS.at(jiter).prob == 1.)
222 cout << "variable [" << i << "]: prob[" << jiter << "]= " << resultsKS.at(jiter).prob << endl;
223 hKSvalues_[i]->SetBinContent(jiter,resultsKS.at(jiter).prob);
224 }
225 delete data;
226 }
227
228 //calculate the final scale factors
229 SFbackg = sumSFbackg/allKS;
230 SFsample = sumSFsample/allKS;
231 outprint << "allKS = " << allKS << "\tbackground = " << SFbackg << "\tsample = " << SFsample << endl;
232 outprint << "==> Scale Factor for QCD MC = " << SFbackg*corr_NevQCD_NEW/corr_NevQCD << endl;
233 outprint << "\tcombined percent_B of Data = "
234 << SFbackg*corr_NevQCD_NEW*100/(realData ? NevData : corr_Nevmix) << endl;
235 outprint << "\tcombined percent_S of Data = "
236 << SFsample*corr_NevWjets*100/(realData ? NevData : corr_Nevmix) << endl;
237 outprint << "\n" << endl;
238 outprint << "=================================" << endl;
239 outprint << "\n" << endl;
240
241
242 //=================================
243 // Plots
244 //=================================
245 for (int i = 0; i < 3; i++) {// 3 variables
246 hKSres_[i] -> Add(hQCD_NEW[i],h_[i+3],SFbackg,SFsample);
247 outprint << "hKSres->Integral() = " << hKSres_[i]->Integral() << endl;
248 outprint << "Data->Integral() = " << mixh_[i]->Integral() << endl;
249
250 mixh_[i]->Rebin(2);
251 hQCD_NEW[i]->Rebin(2);
252 h_[i]->Rebin(2);
253 h_[i+3]->Rebin(2);
254 hKSres_[i]->Rebin(2);
255 //hKSvalues_[i]->Rebin(2);
256
257 mixh_[i] ->SetLineColor(1);
258 hQCD_NEW[i] ->SetLineColor(2);
259 h_[i] ->SetLineColor(4);
260 h_[i+3] ->SetLineColor(3);
261 hKSres_[i] ->SetLineColor(2);
262 hKSvalues_[i]->SetLineColor(i+1);
263
264 mixh_[i] ->SetMarkerColor(1);
265 hQCD_NEW[i] ->SetMarkerColor(2);
266 h_[i] ->SetMarkerColor(4);
267 h_[i+3] ->SetMarkerColor(3);
268 hKSres_[i] ->SetMarkerColor(2);
269 hKSvalues_[i]->SetMarkerColor(i+1);
270
271 mixh_[i] ->SetMarkerStyle(24);
272 hQCD_NEW[i] ->SetMarkerStyle(20);
273 h_[i] ->SetMarkerStyle(20);
274 h_[i+3] ->SetMarkerStyle(20);
275 hKSres_[i] ->SetMarkerStyle(20);
276 hKSvalues_[i]->SetMarkerStyle(20);
277
278 mixh_[i] ->SetMarkerSize(1.4);
279 hQCD_NEW[i] ->SetMarkerSize(1.1);
280 h_[i] ->SetMarkerSize(1.1);
281 h_[i+3] ->SetMarkerSize(1.1);
282 hKSres_[i] ->SetMarkerSize(0.9);
283 hKSvalues_[i]->SetMarkerSize(1.1);
284
285 mixh_[i] ->SetStats(0);
286 hQCD_NEW[i] ->SetStats(0);
287 h_[i] ->SetStats(0);
288 h_[i+3] ->SetStats(0);
289 hKSres_[i] ->SetStats(0);
290 hKSvalues_[i]->SetStats(0);
291
292 hKSres_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
293 hKSres_[i]->GetYaxis()->SetTitle("Entries");
294 hKSvalues_[i]->GetXaxis()->SetTitle("iteration #");
295 hKSvalues_[i]->GetYaxis()->SetTitle("KS test values");
296 h_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
297 h_[i]->GetYaxis()->SetTitle("A.U.");
298
299 TString nameCanvas1 = desDir+histoName[i]+"_QCD_"+suffix+".pdf";
300 TString cvsName1 = histoName[i]+"_QCD";
301 if(useInv) cvsName1 = cvsName1 + "_" + invName;
302 cvs[cvsName1] = new TCanvas(cvsName1,"",600,700);
303 hQCD_NEW[i] -> Scale(1./hQCD_NEW[i]->Integral());
304 h_[i] -> Scale(1./h_[i]->Integral());
305 h_[i+3] -> Scale(1./h_[i+3]->Integral());
306 outprint << "For " << histoName[i] << " , the KStest result btw MC_QCD/InvSel is = "
307 << h_[i] -> KolmogorovTest(hQCD_NEW[i],"") << endl;
308 h_[i]->Draw("P");
309 if (useInv)
310 hQCD_NEW[i]->Draw("sameP");
311 h_[i+3]->Draw("sameP");
312 TLegend *legend1 = new TLegend(0.7, 0.70, 0.9, 0.85);
313 legend1->AddEntry(h_[i], "QCD");
314 if (useInv)
315 legend1->AddEntry(hQCD_NEW[i], "QCD - InvSel");
316 legend1->AddEntry(h_[i+3], "W+jets");
317 legend1->Draw();
318 legend1->SetFillColor(kWhite);
319 //latex->DrawLatex(0.22,0.91,histoName[i].c_str());
320 //cvs[cvsName1]->SetLogy();
321 cvs[cvsName1]->SaveAs(nameCanvas1);
322
323 TString nameCanvas2 = desDir+histoName[i]+"_dataKS_"+suffix+".pdf";
324 TString cvsName2 = histoName[i]+"_dataKS";
325 if(useInv) cvsName2 = cvsName2 + "_" + invName;
326 cvs[cvsName2] = new TCanvas(cvsName2,"",600,700);
327 hKSres_[i]->Draw("P");
328 mixh_[i]->Draw("sameP");
329 TLegend *legend2 = new TLegend(0.7, 0.70, 0.9, 0.85);
330 legend2->AddEntry(mixh_[i], "Data");
331 legend2->AddEntry(hKSres_[i], "KS result");
332 legend2->Draw();
333 legend2->SetFillColor(kWhite);
334 //latex->DrawLatex(0.22,0.91,histoName[i].c_str());
335 //cvs[cvsName2]->SetLogy();
336 cvs[cvsName2]->SaveAs(nameCanvas2);
337
338 }
339
340
341 TString cvsName3 = "KStestValues";
342 if(useInv) cvsName3 = cvsName3 + "_" + invName;
343 cvs[cvsName3] = new TCanvas(cvsName3,"",600,700);
344 //hKSvalues_[0]->GetXaxis()->SetRangeUser(0.9,1.2);
345 hKSvalues_[0]->GetYaxis()->SetRangeUser(1e-36,1.2);
346 hKSvalues_[0]->Draw();
347 hKSvalues_[1]->Draw("same");
348 hKSvalues_[2]->Draw("same");
349 TLegend *legend3 = new TLegend(0.7, 0.70, 0.9, 0.85);
350 legend3->AddEntry(hKSvalues_[0], "muon_pT");
351 legend3->AddEntry(hKSvalues_[1], "MET");
352 legend3->AddEntry(hKSvalues_[2], "W_mT");
353 legend3->Draw();
354 legend3->SetFillColor(kWhite);
355 //latex->DrawLatex(0.22,0.91,"KS test values");
356 cvs[cvsName3]->SetLogy();
357 TString nameCanvas3 = desDir+"KStestValues_newQCD"+suffix+".pdf";
358 cvs[cvsName3]->SaveAs(nameCanvas3);
359 }
360 }