1 |
#include "ikstest.h"
|
2 |
#include "TopStyle/CMSTopStyle.cc"
|
3 |
|
4 |
//=================================
|
5 |
// Program to run IKS test
|
6 |
// * Input directories:
|
7 |
// * Data: skimmed_Data_x.xxpb-1
|
8 |
// => default selection : Data_<suffix>.root
|
9 |
// => n-1 selection : Data_<suffix>_<invName>.root
|
10 |
// * MC : skimmed_MC {QCD,WJets,TTbar...}
|
11 |
// => default selection : QCD_<suffix>.root
|
12 |
// => n-1 selection : Data_<suffix>_<invName>.root
|
13 |
// * Upmost output dir specified by <desDir>
|
14 |
// subdirectory created according to <useInv> and <realData>
|
15 |
// => <desDir'><xyz><suffix>.pdf (.txt)
|
16 |
// * e.g. to do KS test on data with MC QCD shape in signal region:
|
17 |
// useInv=false; realData=true
|
18 |
//=================================
|
19 |
|
20 |
using namespace std;
|
21 |
|
22 |
//define the constants: 2.88/pb
|
23 |
const double weight_[3] = {0.0524313, //QCD
|
24 |
0.0091395, //WJets
|
25 |
0.000306 //TTbar
|
26 |
};
|
27 |
const Double_t procQCD = 1.46;
|
28 |
const Double_t procWjets = 1.03;
|
29 |
const Double_t procttjets = 1.0;
|
30 |
|
31 |
// Output directory
|
32 |
TString baseDir = "Results_2.88pb-1/";
|
33 |
// User defined parameters
|
34 |
bool useInv = true; // whether to use n-1 QCD template
|
35 |
bool realData = false;
|
36 |
// Ntuples to use
|
37 |
TString suffix = "Sel4"; // Suffix of selection
|
38 |
TString invNames[2] = {"RelIsogt0p1","D0gt0p02"};
|
39 |
map<TString,TCanvas*> cvs; // map of usual histogram
|
40 |
|
41 |
//=================================
|
42 |
// Main program
|
43 |
//=================================
|
44 |
void ikstest() {
|
45 |
CMSTopStyle style;
|
46 |
gROOT->SetStyle("CMS");
|
47 |
TLatex *latex = new TLatex();
|
48 |
latex->SetNDC();
|
49 |
|
50 |
int size_ninv = (useInv ? 2 : 1);
|
51 |
for (int ninv = 0;ninv < size_ninv; ++ninv) {
|
52 |
TString invName = invNames[ninv];
|
53 |
TString desDir;
|
54 |
if (!useInv) {
|
55 |
if (!realData) desDir = baseDir + "MC/";
|
56 |
else desDir = baseDir + "Data_MC/";
|
57 |
} else {
|
58 |
if (!realData) desDir = baseDir + "MC_"+invName+"/";
|
59 |
else desDir = baseDir + "Data_"+invName+"/";
|
60 |
}
|
61 |
struct stat stDir;
|
62 |
if (!stat(desDir,&stDir)){
|
63 |
cout << "Output folder exists! Continues? (enter to continue; 'q' for quit)" << endl;
|
64 |
char incmd;
|
65 |
cin.get(incmd);
|
66 |
if (incmd == 'q') return;
|
67 |
} else {
|
68 |
cout << "Creating folder : " << desDir << endl;
|
69 |
if (mkdir(desDir,0755) == -1){
|
70 |
std::cerr << "Error creating folder" << endl;
|
71 |
return;
|
72 |
}
|
73 |
}
|
74 |
|
75 |
ofstream outprint(TString(desDir+"Results_"+suffix+".txt"));
|
76 |
//open the files with histograms
|
77 |
map<string,TFile*> mfile;
|
78 |
mfile["Data"] = TFile::Open(TString("skimmed_Data_2.88pb-1/Data_"+suffix+".root"));
|
79 |
// n-1 cuts
|
80 |
if (useInv) {
|
81 |
if (realData)
|
82 |
mfile["InvSel"] = TFile::Open(TString("skimmed_Data_2.88pb-1/Data_"+suffix+"_"+invName+".root"));
|
83 |
else
|
84 |
mfile["InvSel"] = TFile::Open(TString("skimmed_MC/v5/QCD_"+suffix+"_"+invName+".root"));
|
85 |
}
|
86 |
// RefSel MC
|
87 |
mfile["0"] = TFile::Open(TString("skimmed_MC/v5/QCD_"+suffix+".root"));
|
88 |
mfile["1"] = TFile::Open(TString("skimmed_MC/v5/WJets_"+suffix+".root"));
|
89 |
mfile["2"] = TFile::Open(TString("skimmed_MC/v5/TTbar_"+suffix+".root"));
|
90 |
|
91 |
//define histograms and related parameters
|
92 |
string histoName[3] = {"h_mu_pt_calo","h_met_calo","h_mt_calo"};
|
93 |
string histoLabelX[3] = {"p_{T}^{#mu} [GeV/c]", "#slash{E}_{T} [GeV/c]", "M_{T}^{W} [GeV/c^{2}]"};
|
94 |
Int_t xbins[3] = {20,20,40};
|
95 |
Double_t xlow[3] = {0.,0.,0.};
|
96 |
Double_t xhigh[3] = {100.,100.,200.};
|
97 |
string sample[3] = {"QCD","Wjets","ttjets"};
|
98 |
|
99 |
TH1F* h_[9];
|
100 |
TH1F* mixh_[3];
|
101 |
TH1F* hQCD_NEW[3];
|
102 |
TH1F* hKSres_[3];
|
103 |
TH1F* hKSvalues_[3];
|
104 |
TH1F* hQCD_KS[3];
|
105 |
TH1F* hWJets_KS[3];
|
106 |
|
107 |
//load the histograms from the root files
|
108 |
for (int i = 0; i < 3; i++) {// 3 variables
|
109 |
//cout << "file[" << i << "] : " << endl;
|
110 |
string nameNewHisto = "mix_"+histoName[i];
|
111 |
string nameNewHistoSFKS = "finalSF_"+histoName[i];
|
112 |
string nameNewHistoKSvalues = "KSvalues_"+histoLabelX[i];
|
113 |
|
114 |
mixh_[i] = new TH1F(nameNewHisto.c_str(),"",xbins[i],xlow[i],xhigh[i]);
|
115 |
hKSres_[i] = new TH1F(nameNewHistoSFKS.c_str(),"",xbins[i],xlow[i],xhigh[i]);
|
116 |
hKSvalues_[i] = new TH1F(nameNewHistoKSvalues.c_str(),"",2./stepsize, stepsize, 2.+stepsize);
|
117 |
|
118 |
if (!useInv) {//use QCD MC sample
|
119 |
hQCD_NEW[i] = (TH1F*) mfile["0"]->Get(TString(histoName[i]))->Clone();
|
120 |
hQCD_NEW[i] -> Scale(weight_[0]);
|
121 |
hQCD_NEW[i] -> SetName((histoName[i]).c_str());
|
122 |
}
|
123 |
else {
|
124 |
hQCD_NEW[i] = (TH1F*) mfile["InvSel"]->Get(TString(histoName[i]));
|
125 |
if (!realData) hQCD_NEW[i] -> Scale(weight_[0]);
|
126 |
hQCD_NEW[i] -> SetName((histoName[i]).c_str());
|
127 |
}
|
128 |
|
129 |
mixh_[i] -> Sumw2();
|
130 |
hKSres_[i] -> Sumw2();
|
131 |
hKSvalues_[i] -> Sumw2();
|
132 |
}
|
133 |
|
134 |
for (int n = 0; n < 3; ++n) {// 3 MC samples
|
135 |
for (int ihisto = 0; ihisto < 3; ihisto++) {// 3 variables
|
136 |
//cout << "Variable[" << ihisto << "]" << endl;
|
137 |
string histo_name = histoName[ihisto]+sample[n];
|
138 |
ostringstream ss;
|
139 |
ss << n;
|
140 |
h_[n*3+ihisto] = (TH1F*) mfile[ss.str()]->Get(TString(histoName[ihisto]))->Clone();
|
141 |
h_[n*3+ihisto] -> Scale(weight_[n]);
|
142 |
h_[n*3+ihisto] -> SetName(histo_name.c_str());
|
143 |
}
|
144 |
}
|
145 |
|
146 |
//create the mixed samples = "data"
|
147 |
TString cvsName0 = "Data";
|
148 |
if (useInv) {
|
149 |
cvsName0 += "_";
|
150 |
cvsName0 += invName;
|
151 |
}
|
152 |
cvs[cvsName0] = new TCanvas(cvsName0,"Data distributions",600,700);
|
153 |
cvs[cvsName0]->Divide(3,1);
|
154 |
for (int i = 0; i < 3; i++) {
|
155 |
cvs[cvsName0]->cd(i+1);
|
156 |
if (!realData) {
|
157 |
mixh_[i] -> Add(h_[i],h_[i+3], procQCD,procWjets);
|
158 |
//mixh_[i] -> Add(mixh_[i],h_[i+6], 1,procttjets);
|
159 |
//cout << "histo_name: " << mixh_[0]->GetNbinsX() << endl;
|
160 |
}
|
161 |
else {
|
162 |
TH1F *htmp = (TH1F*) mfile["Data"]->Get(TString(histoName[i]));
|
163 |
mixh_[i] -> Add(htmp,1.);
|
164 |
}
|
165 |
mixh_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
|
166 |
mixh_[i]->GetYaxis()->SetTitle("Entries");
|
167 |
mixh_[i]->DrawClone();
|
168 |
}
|
169 |
cvs[cvsName0]->SaveAs(TString(desDir+"Data_distributions.pdf"));
|
170 |
|
171 |
//define the weight corrections for each sample
|
172 |
double NevData = mixh_[2]->Integral();
|
173 |
double corr_NevQCD = h_[2]->Integral();
|
174 |
double corr_NevQCD_NEW = hQCD_NEW[2]->Integral();
|
175 |
double corr_NevWjets = h_[5]->Integral();
|
176 |
double corr_Nevttjets = h_[8]->Integral();
|
177 |
double corr_Nevmix = procQCD*corr_NevQCD+procWjets*corr_NevWjets;
|
178 |
//double corr_Nevmix = procQCD*corr_NevQCD+procWjets*corr_NevWjets+procttjets*corr_Nevttjets;
|
179 |
if (!realData)
|
180 |
outprint << "Events mix sample = " << corr_Nevmix << endl;
|
181 |
else
|
182 |
outprint << "Events in Data = " << NevData << endl;
|
183 |
outprint << "Events QCD sample = " << corr_NevQCD << endl;
|
184 |
outprint << "Events Wjets sample = " << corr_NevWjets << endl;
|
185 |
outprint << "Events InvSel sample = " << corr_NevQCD_NEW << endl;
|
186 |
|
187 |
//define the containers for chosen numbers (coressponding to the max KStest result)
|
188 |
testMC maxProb[3];
|
189 |
|
190 |
//define the scale factors calculated using information obtained from all parameters
|
191 |
Double_t SFbackg = 0.0;
|
192 |
Double_t sumSFbackg = 0.0;
|
193 |
Double_t SFsample = 0.0;
|
194 |
Double_t sumSFsample = 0.0;
|
195 |
Double_t allKS = 0.0;
|
196 |
|
197 |
//do the KS test by varying the scale factors
|
198 |
for (int i = 0; i < 3; i++) { // 3 variables
|
199 |
TH1F *data = (TH1F*)mixh_[i]->Clone();
|
200 |
data -> SetName("dataClone");
|
201 |
//data -> Scale(1./data->Integral());
|
202 |
vector<testMC> resultsKS = doKStest((realData ? NevData : corr_Nevmix),
|
203 |
(useInv ? corr_NevQCD_NEW : corr_NevQCD),
|
204 |
corr_NevWjets,
|
205 |
data, hQCD_NEW[i], h_[i+3]);
|
206 |
testMC tksmax = getMax(resultsKS);
|
207 |
maxProb[i] = tksmax;
|
208 |
outprint << "\nFor the plot " << histoLabelX[i] << " the results are:"<< endl;
|
209 |
outprint << "\tmax Probability = " << maxProb[i].prob << endl;
|
210 |
outprint << "\tproc_background = " << maxProb[i].scaleF_backg << endl;
|
211 |
outprint << "\tproc_sample = " << maxProb[i].scaleF_sample << endl;
|
212 |
|
213 |
outprint << "\n\tpercent_B of Data = "
|
214 |
<< maxProb[i].scaleF_backg*corr_NevQCD_NEW*100/(realData ? NevData : corr_Nevmix) << endl;
|
215 |
outprint << "\tpercent_S of Data = "
|
216 |
<< maxProb[i].scaleF_sample*corr_NevWjets*100/(realData ? NevData : corr_Nevmix) << endl;
|
217 |
outprint << "---------------------------" << endl;
|
218 |
|
219 |
//create the mixed samples with KS test results
|
220 |
sumSFbackg += maxProb[i].prob*maxProb[i].scaleF_backg;
|
221 |
sumSFsample += maxProb[i].prob*maxProb[i].scaleF_sample;
|
222 |
allKS += maxProb[i].prob;
|
223 |
|
224 |
//fill a histogram with the results from the KS test for each variable
|
225 |
for (int jiter = 0; jiter < resultsKS.size(); jiter++) {
|
226 |
if (resultsKS.at(jiter).prob == 1.)
|
227 |
cout << "variable [" << i << "]: prob[" << jiter << "]= " << resultsKS.at(jiter).prob << endl;
|
228 |
hKSvalues_[i]->SetBinContent(jiter,resultsKS.at(jiter).prob);
|
229 |
}
|
230 |
delete data;
|
231 |
}
|
232 |
|
233 |
//calculate the final scale factors
|
234 |
SFbackg = sumSFbackg/allKS;
|
235 |
SFsample = sumSFsample/allKS;
|
236 |
outprint << "allKS = " << allKS << "\tbackground = " << SFbackg << "\tsample = " << SFsample << endl;
|
237 |
outprint << "==> Scale Factor for QCD MC = " << SFbackg*corr_NevQCD_NEW/corr_NevQCD << endl;
|
238 |
outprint << "\tcombined percent_B of Data = "
|
239 |
<< SFbackg*corr_NevQCD_NEW*100/(realData ? NevData : corr_Nevmix) << endl;
|
240 |
outprint << "\tcombined percent_S of Data = "
|
241 |
<< SFsample*corr_NevWjets*100/(realData ? NevData : corr_Nevmix) << endl;
|
242 |
outprint << "\n" << endl;
|
243 |
outprint << "=================================" << endl;
|
244 |
outprint << "\n" << endl;
|
245 |
|
246 |
|
247 |
//=================================
|
248 |
// Plots
|
249 |
//=================================
|
250 |
for (int i = 0; i < 3; i++) {// 3 variables
|
251 |
hKSres_[i] -> Add(hQCD_NEW[i],h_[i+3],SFbackg,SFsample);
|
252 |
outprint << "hKSres->Integral() = " << hKSres_[i]->Integral() << endl;
|
253 |
outprint << "Data->Integral() = " << mixh_[i]->Integral() << endl;
|
254 |
|
255 |
mixh_[i]->Rebin(2);
|
256 |
hQCD_NEW[i]->Rebin(2);
|
257 |
h_[i]->Rebin(2);
|
258 |
h_[i+3]->Rebin(2);
|
259 |
hKSres_[i]->Rebin(2);
|
260 |
//hKSvalues_[i]->Rebin(2);
|
261 |
|
262 |
// Stack Wjets and QCD after scaled by KS factors
|
263 |
hQCD_KS[i] = (TH1F*) hQCD_NEW[i]->Clone();
|
264 |
hQCD_KS[i]->Scale(SFbackg);
|
265 |
hQCD_KS[i]->SetLineColor(style.QCDColor);
|
266 |
hQCD_KS[i]->SetFillColor(style.QCDColor);
|
267 |
hQCD_KS[i]->SetFillStyle(style.QCDFill);
|
268 |
|
269 |
hWJets_KS[i] = (TH1F*) h_[i+3]->Clone();
|
270 |
hWJets_KS[i]->Scale(SFsample);
|
271 |
hWJets_KS[i]->SetLineColor(style.WJetsColor);
|
272 |
hWJets_KS[i]->SetFillColor(style.WJetsColor);
|
273 |
hWJets_KS[i]->SetFillStyle(style.WJetsFill);
|
274 |
|
275 |
THStack *hst = new THStack(invName,invName);
|
276 |
hst->Add(hQCD_KS[i]);
|
277 |
hst->Add(hWJets_KS[i]);
|
278 |
|
279 |
// Set plotting parameters
|
280 |
mixh_[i] ->SetLineColor(1);
|
281 |
hQCD_NEW[i] ->SetLineColor(2);
|
282 |
h_[i] ->SetLineColor(4);
|
283 |
h_[i+3] ->SetLineColor(3);
|
284 |
hKSres_[i] ->SetLineColor(2);
|
285 |
hKSvalues_[i]->SetLineColor(i+1);
|
286 |
|
287 |
mixh_[i] ->SetMarkerColor(1);
|
288 |
hQCD_NEW[i] ->SetMarkerColor(2);
|
289 |
h_[i] ->SetMarkerColor(4);
|
290 |
h_[i+3] ->SetMarkerColor(3);
|
291 |
hKSres_[i] ->SetMarkerColor(2);
|
292 |
hKSvalues_[i]->SetMarkerColor(i+1);
|
293 |
|
294 |
mixh_[i] ->SetMarkerStyle(24);
|
295 |
hQCD_NEW[i] ->SetMarkerStyle(20);
|
296 |
h_[i] ->SetMarkerStyle(20);
|
297 |
h_[i+3] ->SetMarkerStyle(20);
|
298 |
hKSres_[i] ->SetMarkerStyle(20);
|
299 |
hKSvalues_[i]->SetMarkerStyle(20);
|
300 |
|
301 |
mixh_[i] ->SetMarkerSize(1.4);
|
302 |
hQCD_NEW[i] ->SetMarkerSize(1.1);
|
303 |
h_[i] ->SetMarkerSize(1.1);
|
304 |
h_[i+3] ->SetMarkerSize(1.1);
|
305 |
hKSres_[i] ->SetMarkerSize(0.9);
|
306 |
hKSvalues_[i]->SetMarkerSize(1.1);
|
307 |
|
308 |
mixh_[i] ->SetStats(0);
|
309 |
hQCD_NEW[i] ->SetStats(0);
|
310 |
h_[i] ->SetStats(0);
|
311 |
h_[i+3] ->SetStats(0);
|
312 |
hKSres_[i] ->SetStats(0);
|
313 |
hKSvalues_[i]->SetStats(0);
|
314 |
hQCD_KS[i] ->SetStats(0);
|
315 |
hWJets_KS[i] ->SetStats(0);
|
316 |
|
317 |
hKSres_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
|
318 |
hKSres_[i]->GetYaxis()->SetTitle("Entries");
|
319 |
hKSvalues_[i]->GetXaxis()->SetTitle("iteration #");
|
320 |
hKSvalues_[i]->GetYaxis()->SetTitle("KS test values");
|
321 |
h_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
|
322 |
h_[i]->GetYaxis()->SetTitle("A.U.");
|
323 |
|
324 |
TString nameCanvas1 = desDir+histoName[i]+"_QCD_"+suffix+".pdf";
|
325 |
TString cvsName1 = histoName[i]+"_QCD";
|
326 |
if(useInv) cvsName1 = cvsName1 + "_" + invName;
|
327 |
cvs[cvsName1] = new TCanvas(cvsName1,"",600,700);
|
328 |
hQCD_NEW[i] -> Scale(1./hQCD_NEW[i]->Integral());
|
329 |
h_[i] -> Scale(1./h_[i]->Integral());
|
330 |
h_[i+3] -> Scale(1./h_[i+3]->Integral());
|
331 |
outprint << "For " << histoName[i] << " , the KStest result btw MC_QCD/InvSel is = "
|
332 |
<< h_[i] -> KolmogorovTest(hQCD_NEW[i],"") << endl;
|
333 |
h_[i]->Draw("P");
|
334 |
if (useInv)
|
335 |
hQCD_NEW[i]->Draw("sameP");
|
336 |
h_[i+3]->Draw("sameP");
|
337 |
TLegend *legend1 = new TLegend(0.7, 0.70, 0.9, 0.85);
|
338 |
legend1->AddEntry(h_[i], "QCD");
|
339 |
if (useInv)
|
340 |
legend1->AddEntry(hQCD_NEW[i], "QCD - InvSel");
|
341 |
legend1->AddEntry(h_[i+3], "W+jets");
|
342 |
legend1->Draw();
|
343 |
legend1->SetFillColor(kWhite);
|
344 |
//latex->DrawLatex(0.22,0.91,histoName[i].c_str());
|
345 |
//cvs[cvsName1]->SetLogy();
|
346 |
cvs[cvsName1]->SaveAs(nameCanvas1);
|
347 |
|
348 |
TString nameCanvas2 = desDir+histoName[i]+"_dataKS_"+suffix+".pdf";
|
349 |
TString cvsName2 = histoName[i]+"_dataKS";
|
350 |
if(useInv) cvsName2 = cvsName2 + "_" + invName;
|
351 |
cvs[cvsName2] = new TCanvas(cvsName2,"",600,700);
|
352 |
hst->Draw("hist");
|
353 |
hKSres_[i]->Draw("sameP");
|
354 |
mixh_[i]->Draw("sameP");
|
355 |
|
356 |
TLegend *legend2 = new TLegend(0.7, 0.70, 0.9, 0.85);
|
357 |
legend2->AddEntry(mixh_[i], "Data");
|
358 |
legend2->AddEntry(hKSres_[i], "KS result");
|
359 |
legend2->AddEntry(hWJets_KS[i], style.WJetsText);
|
360 |
legend2->AddEntry(hQCD_KS[i], style.QCDText);
|
361 |
legend2->Draw();
|
362 |
legend2->SetFillColor(kWhite);
|
363 |
//latex->DrawLatex(0.22,0.91,histoName[i].c_str());
|
364 |
//cvs[cvsName2]->SetLogy();
|
365 |
cvs[cvsName2]->SaveAs(nameCanvas2);
|
366 |
}
|
367 |
|
368 |
TString cvsName3 = "KStestValues";
|
369 |
if(useInv) cvsName3 = cvsName3 + "_" + invName;
|
370 |
cvs[cvsName3] = new TCanvas(cvsName3,"",600,700);
|
371 |
//hKSvalues_[0]->GetXaxis()->SetRangeUser(0.9,1.2);
|
372 |
hKSvalues_[0]->GetYaxis()->SetRangeUser(1e-36,1.2);
|
373 |
hKSvalues_[0]->Draw();
|
374 |
hKSvalues_[1]->Draw("same");
|
375 |
hKSvalues_[2]->Draw("same");
|
376 |
TLegend *legend3 = new TLegend(0.7, 0.70, 0.9, 0.85);
|
377 |
legend3->AddEntry(hKSvalues_[0], "muon_pT");
|
378 |
legend3->AddEntry(hKSvalues_[1], "MET");
|
379 |
legend3->AddEntry(hKSvalues_[2], "W_mT");
|
380 |
legend3->Draw();
|
381 |
legend3->SetFillColor(kWhite);
|
382 |
//latex->DrawLatex(0.22,0.91,"KS test values");
|
383 |
cvs[cvsName3]->SetLogy();
|
384 |
TString nameCanvas3 = desDir+"KStestValues_newQCD"+suffix+".pdf";
|
385 |
cvs[cvsName3]->SaveAs(nameCanvas3);
|
386 |
}
|
387 |
}
|