ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/Jeng/scripts/ikstest.cc
Revision: 1.2
Committed: Thu Sep 2 04:50:39 2010 UTC (14 years, 8 months ago) by jengbou
Content type: text/plain
Branch: MAIN
Changes since 1.1: +226 -55 lines
Log Message:
update

File Contents

# User Rev Content
1 jengbou 1.1 #include <vector>
2     #include <TFile.h>
3     #include <TH1.h>
4     #include <TH2.h>
5     #include <TLatex.h>
6     #include <TCanvas.h>
7     #include <TMath.h>
8     #include <TLegend.h>
9     #include <TObject.h>
10     #include <iostream>
11 jengbou 1.2 #include <sstream>
12 jengbou 1.1 #include <fstream>
13     #include <iomanip>
14 jengbou 1.2 #include <map>
15 jengbou 1.1
16     using namespace std;
17    
18 jengbou 1.2 // User defined parameters
19     bool useInv = false; // whether to use n-1 QCD template
20     bool realData = false;
21     const double stepsize = 0.001;
22    
23     //define the constants: 2.78/pb
24     const double weight_[3] = {0.0506107, //QCD
25     0.0088222, //WJets
26     0.000295 //TTbar
27     };
28     const Double_t procQCD = 1.46;
29     const Double_t procWjets = 1.03;
30     const Double_t procttjets = 1.0;
31 jengbou 1.1
32     struct testMC {
33     testMC(Double_t p = 0., Double_t sb = 0., Double_t ss = 0.){prob=p; scaleF_backg = sb; scaleF_sample = ss;}
34     Double_t prob;
35     Double_t scaleF_backg;
36     Double_t scaleF_sample;
37     };
38    
39     //function for doing KS test
40 jengbou 1.2 vector<testMC> doKStest(Double_t NsS, Double_t Ns1, Double_t Ns2, TH1F* mixS, TH1F* s1, TH1F* s2) {
41 jengbou 1.1 vector<testMC> output;
42     //define the scale factors
43     Double_t sf1 = 0.0; // QCD
44     Double_t sf2 = 0.0; // Wjets
45     //KS test
46     do {
47 jengbou 1.2 sf1 = (NsS - Ns2*sf2)/Ns1;
48 jengbou 1.1 if (sf1 < 0) break;
49     //cout << "..........sf1 = " << sf1 << endl;
50     int nbins = mixS->GetNbinsX();
51     double xmin = mixS->GetXaxis()->GetBinLowEdge(1);
52     double xmax = mixS->GetXaxis()->GetBinUpEdge(nbins);
53     TH1F *test = new TH1F("test", "", nbins, xmin, xmax);
54     test -> Sumw2();
55 jengbou 1.2 test -> Add(s1,s2,sf1,sf2);
56 jengbou 1.1 //test->Scale(1./(1.*test->Integral()));
57 jengbou 1.2 TString option = (!useInv && !realData) ? "N" : "";
58     Double_t probability = mixS -> KolmogorovTest(test,option);
59 jengbou 1.1 testMC temp = testMC(probability,sf1,sf2);
60     output.push_back(temp);
61     // cout << "probability = " << setw(15) << temp.prob
62     // << "; sfQCD = " << setw(10) << temp.scaleF_backg
63 jengbou 1.2 // << "; sfWjets = " << setw(6) << temp.scaleF_sample << endl;
64 jengbou 1.1 delete test;
65 jengbou 1.2 sf2 = sf2 + stepsize;
66     } while(sf1 > 0 && sf2 <= 2.0);
67 jengbou 1.1 return output;
68     }
69    
70     //get the maximum KS test result
71     testMC getMax(vector<testMC> vec) {
72     testMC maxKSRes;
73     Double_t maximum = 0.0;
74     for (size_t i = 0; i < vec.size(); i++) {
75     if (maximum < vec.at(i).prob) {
76     maximum = vec.at(i).prob;
77     maxKSRes = vec.at(i);
78     }
79     }
80 jengbou 1.2 cout << "for maximum: " << setw(12) << maxKSRes.prob
81     << "; sb = " << setw(10) << maxKSRes.scaleF_backg
82     << "; ss = " << setw(5) << maxKSRes.scaleF_sample << endl;
83 jengbou 1.1 return maxKSRes;
84     }
85    
86 jengbou 1.2
87     //=================================
88     // Main program
89     //=================================
90 jengbou 1.1 void ikstest() {
91     //Style();
92     TLatex *latex = new TLatex();
93     latex->SetNDC();
94    
95 jengbou 1.2 ofstream outprint( "ikstest_results_20100901.txt" );
96     //open the files with histograms
97     map<string,TFile*> mfile;
98     mfile["Data"] = TFile::Open("skimmed_Data_20100901/Data_RefSel_v3.root");
99     // n-1 cuts
100     if (useInv) {
101     if (realData)
102     // mfile["InvSel"] = TFile::Open("skimmed_Data_20100825/Data_D0ge0p03.root");
103     mfile["InvSel"] = TFile::Open("skimmed_Data_20100901/Data_RelIsoge0p1_v3.root");
104     else
105     // mfile["InvSel"] = TFile::Open("skimmed_MC/QCD_D0ge0p03.root");
106     mfile["InvSel"] = TFile::Open("skimmed_MC/QCD_RelIsoge0p1_v3.root");
107     }
108 jengbou 1.1
109 jengbou 1.2 // RefSel MC
110     mfile["0"] = TFile::Open("skimmed_MC/QCD_RefSel_v3.root");
111     mfile["1"] = TFile::Open("skimmed_MC/WJets_RefSel_v3.root");
112     mfile["2"] = TFile::Open("skimmed_MC/TTbar_RefSel_v3.root");
113 jengbou 1.1
114     //define histograms and related parameters
115 jengbou 1.2 string histoName[3] = {"h_mu_pt_calo","h_met_calo","h_mt_calo"};
116 jengbou 1.1 string histoLabelX[3] = {"p_{T}^{good Muons}", "E_{T}^{#nu}", "m_{T}^{W}"};
117 jengbou 1.2 Int_t xbins[3] = {20,20,40};
118 jengbou 1.1 Double_t xlow[3] = {0.,0.,0.};
119 jengbou 1.2 Double_t xhigh[3] = {100.,100.,200.};
120 jengbou 1.1 string sample[3] = {"QCD","Wjets","ttjets"};
121    
122     TH1F* h_[9];
123     TH1F* mixh_[3];
124     TH1F* hQCD_NEW[3];
125     TH1F* hKSres_[3];
126     TH1F* hKSvalues_[3];
127    
128     //load the histograms from the root files
129     for (int i = 0; i < 3; i++) {// 3 variables
130     //cout << "file[" << i << "] : " << endl;
131 jengbou 1.2 string nameNewHisto = "mix_"+histoName[i];
132     string nameNewHistoSFKS = "finalSF_"+histoName[i];
133 jengbou 1.1 string nameNewHistoKSvalues = "KSvalues_"+histoLabelX[i];
134    
135     mixh_[i] = new TH1F(nameNewHisto.c_str(),"",xbins[i],xlow[i],xhigh[i]);
136     hKSres_[i] = new TH1F(nameNewHistoSFKS.c_str(),"",xbins[i],xlow[i],xhigh[i]);
137 jengbou 1.2 hKSvalues_[i] = new TH1F(nameNewHistoKSvalues.c_str(),"",2./stepsize, stepsize, 2.+stepsize);
138    
139     if (!useInv) {//use QCD MC sample
140     hQCD_NEW[i] = (TH1F*) mfile["0"]->Get(TString(histoName[i]))->Clone();
141     hQCD_NEW[i] -> Scale(weight_[0]);
142     hQCD_NEW[i] -> SetName((histoName[i]).c_str());
143     }
144     else {
145     hQCD_NEW[i] = (TH1F*) mfile["InvSel"]->Get(TString(histoName[i]));
146     if (!realData) hQCD_NEW[i] -> Scale(weight_[0]);
147     hQCD_NEW[i] -> SetName((histoName[i]).c_str());
148     }
149 jengbou 1.1
150     mixh_[i] -> Sumw2();
151     hKSres_[i] -> Sumw2();
152     hKSvalues_[i] -> Sumw2();
153 jengbou 1.2 }
154 jengbou 1.1
155 jengbou 1.2 for (int n = 0; n < 3; ++n) {// 3 MC samples
156     for (int ihisto = 0; ihisto < 3; ihisto++) {// 3 variables
157 jengbou 1.1 //cout << "Variable[" << ihisto << "]" << endl;
158 jengbou 1.2 string histo_name = histoName[ihisto]+sample[n];
159     ostringstream ss;
160     ss << n;
161     h_[n*3+ihisto] = (TH1F*) mfile[ss.str()]->Get(TString(histoName[ihisto]))->Clone();
162     h_[n*3+ihisto] -> Scale(weight_[n]);
163     h_[n*3+ihisto] -> SetName(histo_name.c_str());
164 jengbou 1.1 }
165     }
166    
167     //create the mixed samples = "data"
168 jengbou 1.2 TCanvas *canvas0 = new TCanvas("Data","Data distributions");
169     canvas0->Divide(3,1);
170 jengbou 1.1 for (int i = 0; i < 3; i++) {
171 jengbou 1.2 canvas0->cd(i+1);
172     if (!realData) {
173     mixh_[i] -> Add(h_[i],h_[i+3], procQCD,procWjets);
174     //mixh_[i] -> Add(mixh_[i],h_[i+6], 1,procttjets);
175     //cout << "histo_name: " << mixh_[0]->GetNbinsX() << endl;
176     }
177     else {
178     TH1F *htmp = (TH1F*) mfile["Data"]->Get(TString(histoName[i]));
179     mixh_[i] -> Add(htmp,1.);
180     }
181     mixh_[i]->DrawClone();
182 jengbou 1.1 }
183 jengbou 1.2 canvas0->SaveAs("Data_distributions.pdf");
184 jengbou 1.1
185     //define the weight corrections for each sample
186 jengbou 1.2 double NevData = mixh_[2]->Integral();
187     double corr_NevQCD = h_[2]->Integral();
188     double corr_NevQCD_NEW = hQCD_NEW[2]->Integral();
189     double corr_NevWjets = h_[5]->Integral();
190     double corr_Nevttjets = h_[8]->Integral();
191 jengbou 1.1 double corr_Nevmix = procQCD*corr_NevQCD+procWjets*corr_NevWjets;
192 jengbou 1.2 //double corr_Nevmix = procQCD*corr_NevQCD+procWjets*corr_NevWjets+procttjets*corr_Nevttjets;
193     if (!realData)
194     outprint << "Events mix sample = " << corr_Nevmix << endl;
195     else
196     outprint << "Events in Data = " << NevData << endl;
197 jengbou 1.1 outprint << "Events QCD sample = " << corr_NevQCD << endl;
198     outprint << "Events Wjets sample = " << corr_NevWjets << endl;
199 jengbou 1.2 outprint << "Events InvSel QCD sample = " << corr_NevQCD_NEW << endl;
200 jengbou 1.1
201     //define the containers for chosen numbers (coressponding to the max KStest result)
202     testMC maxProb[3];
203    
204     //define the scale factors calculated using information obtained from all parameters
205     Double_t SFbackg = 0.0;
206     Double_t sumSFbackg = 0.0;
207     Double_t SFsample = 0.0;
208     Double_t sumSFsample = 0.0;
209     Double_t allKS = 0.0;
210    
211     //do the KS test by varying the scale factors
212     for (int i = 0; i < 3; i++) { // 3 variables
213     TH1F *data = (TH1F*)mixh_[i]->Clone();
214 jengbou 1.2 data -> SetName("dataClone");
215 jengbou 1.1 //data -> Scale(1./data->Integral());
216 jengbou 1.2 vector<testMC> resultsKS = doKStest((realData ? NevData : corr_Nevmix),
217     (useInv ? corr_NevQCD_NEW : corr_NevQCD),
218     corr_NevWjets,
219     data, hQCD_NEW[i], h_[i+3]);
220 jengbou 1.1 testMC tksmax = getMax(resultsKS);
221     maxProb[i] = tksmax;
222     outprint << "\nFor the plot " << histoLabelX[i] << " the results are:"<< endl;
223     outprint << "\tmax Probability = " << maxProb[i].prob << endl;
224     outprint << "\tproc_background = " << maxProb[i].scaleF_backg << endl;
225     outprint << "\tproc_sample = " << maxProb[i].scaleF_sample << endl;
226    
227 jengbou 1.2 outprint << "\n\tpercent_B of Data = "
228     << maxProb[i].scaleF_backg*corr_NevQCD_NEW*100/(realData ? NevData : corr_Nevmix) << endl;
229     outprint << "\tpercent_S of Data = "
230     << maxProb[i].scaleF_sample*corr_NevWjets*100/(realData ? NevData : corr_Nevmix) << endl;
231 jengbou 1.1 outprint << "---------------------------" << endl;
232    
233     //create the mixed samples with KS test results
234     sumSFbackg += maxProb[i].prob*maxProb[i].scaleF_backg;
235     sumSFsample += maxProb[i].prob*maxProb[i].scaleF_sample;
236     allKS += maxProb[i].prob;
237    
238     //fill a histogram with the results from the KS test for each variable
239     for (int jiter = 0; jiter < resultsKS.size(); jiter++) {
240 jengbou 1.2 if (resultsKS.at(jiter).prob == 1.)
241     cout << "variable [" << i << "]: prob[" << jiter << "]= " << resultsKS.at(jiter).prob << endl;
242 jengbou 1.1 hKSvalues_[i]->SetBinContent(jiter,resultsKS.at(jiter).prob);
243     }
244     delete data;
245     }
246    
247     //calculate the final scale factors
248     SFbackg = sumSFbackg/allKS;
249     SFsample = sumSFsample/allKS;
250     outprint << "allKS = " << allKS << "\tbackground = " << SFbackg << "\tsample = " << SFsample << endl;
251 jengbou 1.2 outprint << "\tcombined percent_B of Data = "
252     << SFbackg*corr_NevQCD_NEW*100/(realData ? NevData : corr_Nevmix) << endl;
253     outprint << "\tcombined percent_S of Data = "
254     << SFsample*corr_NevWjets*100/(realData ? NevData : corr_Nevmix) << endl;
255 jengbou 1.1 outprint << "\n" << endl;
256     outprint << "=================================" << endl;
257     outprint << "\n" << endl;
258    
259 jengbou 1.2
260     //=================================
261     // Plots
262     //=================================
263     for (int i = 0; i < 3; i++) {// 3 variables
264     hKSres_[i] -> Add(hQCD_NEW[i],h_[i+3],SFbackg,SFsample);
265     outprint << "hKSres->Integral() = " << hKSres_[i]->Integral() << endl;
266     outprint << "Data->Integral() = " << mixh_[i]->Integral() << endl;
267    
268     mixh_[i]->Rebin(2);
269     hQCD_NEW[i]->Rebin(2);
270     h_[i]->Rebin(2);
271     h_[i+3]->Rebin(2);
272     hKSres_[i]->Rebin(2);
273     //hKSvalues_[i]->Rebin(2);
274    
275     mixh_[i] ->SetLineColor(1);
276     hQCD_NEW[i] ->SetLineColor(3);
277     h_[i] ->SetLineColor(6);
278     h_[i+3] ->SetLineColor(4);
279     hKSres_[i] ->SetLineColor(2);
280     hKSvalues_[i]->SetLineColor(i+1);
281    
282     mixh_[i] ->SetMarkerColor(1);
283     hQCD_NEW[i] ->SetMarkerColor(3);
284     h_[i] ->SetMarkerColor(6);
285     h_[i+3] ->SetMarkerColor(4);
286     hKSres_[i] ->SetMarkerColor(2);
287     hKSvalues_[i]->SetMarkerColor(i+1);
288    
289     mixh_[i] ->SetMarkerStyle(24);
290     hQCD_NEW[i] ->SetMarkerStyle(20);
291     h_[i] ->SetMarkerStyle(20);
292     h_[i+3] ->SetMarkerStyle(20);
293     hKSres_[i] ->SetMarkerStyle(20);
294     hKSvalues_[i]->SetMarkerStyle(20);
295    
296     mixh_[i] ->SetMarkerSize(1.4);
297     hQCD_NEW[i] ->SetMarkerSize(1.1);
298     h_[i] ->SetMarkerSize(1.1);
299     h_[i+3] ->SetMarkerSize(1.1);
300     hKSres_[i] ->SetMarkerSize(0.9);
301     hKSvalues_[i]->SetMarkerSize(1.1);
302    
303     mixh_[i] ->SetStats(0);
304     hQCD_NEW[i] ->SetStats(0);
305     h_[i] ->SetStats(0);
306     h_[i+3] ->SetStats(0);
307     hKSres_[i] ->SetStats(0);
308     hKSvalues_[i]->SetStats(0);
309    
310     mixh_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
311     mixh_[i]->GetYaxis()->SetTitle("Entries");
312     hKSres_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
313     hKSres_[i]->GetYaxis()->SetTitle("Entries");
314     hKSvalues_[i]->GetXaxis()->SetTitle("iteration #");
315     hKSvalues_[i]->GetYaxis()->SetTitle("KS test values");
316    
317     string nameCanvas1 = histoName[i]+"_QCD.pdf";
318     TCanvas *canvas1 = new TCanvas(nameCanvas1.c_str(), "");
319     hQCD_NEW[i] -> Scale(1./hQCD_NEW[i]->Integral());
320     h_[i] -> Scale(1./h_[i]->Integral());
321     h_[i+3] -> Scale(1./h_[i+3]->Integral());
322     outprint << "For " << histoName[i] << " , the KStest result btw MC_QCD/InvSel is = "
323     << h_[i] -> KolmogorovTest(hQCD_NEW[i],"") << endl;
324     hQCD_NEW[i]->Draw("P");
325     h_[i]->Draw("sameP");
326     h_[i+3]->Draw("sameP");
327     TLegend *legend1 = new TLegend(0.7, 0.70, 0.9, 0.85);
328     legend1->AddEntry(h_[i], "default");
329     legend1->AddEntry(h_[i+3], "W+jets");
330     legend1->AddEntry(hQCD_NEW[i], "new");
331     legend1->Draw();
332     legend1->SetFillColor(kWhite);
333     latex->DrawLatex(0.22,0.91,histoName[i].c_str());
334     canvas1->SetLogy();
335     canvas1->SaveAs(nameCanvas1.c_str());
336    
337     string nameCanvas2 = histoName[i]+"_dataKS.pdf";
338     TCanvas *canvas2 = new TCanvas(nameCanvas2.c_str(), "");
339     hKSres_[i]->Draw("P");
340     mixh_[i]->Draw("sameP");
341     TLegend *legend2 = new TLegend(0.7, 0.70, 0.9, 0.85);
342     legend2->AddEntry(mixh_[i], "Data");
343     legend2->AddEntry(hKSres_[i], "KS result");
344     legend2->Draw();
345     legend2->SetFillColor(kWhite);
346     latex->DrawLatex(0.22,0.91,histoName[i].c_str());
347     canvas2->SetLogy();
348     canvas2->SaveAs(nameCanvas2.c_str());
349    
350     }
351    
352    
353     TCanvas *canvas3 = new TCanvas("KStestValues", "");
354     //hKSvalues_[0]->GetXaxis()->SetRangeUser(0.9,1.1);
355     //hKSvalues_[0]->GetYaxis()->SetRangeUser(1e-3,1.1);
356     hKSvalues_[0]->Draw();
357     hKSvalues_[1]->SetLineColor(2);
358     hKSvalues_[1]->Draw("same");
359     hKSvalues_[2]->SetLineColor(4);
360     hKSvalues_[2]->Draw("same");
361     TLegend *legend3 = new TLegend(0.7, 0.70, 0.9, 0.85);
362     legend3->AddEntry(hKSvalues_[0], "muon_pT");
363     legend3->AddEntry(hKSvalues_[1], "MET");
364     legend3->AddEntry(hKSvalues_[2], "W_mT");
365     legend3->Draw();
366     legend3->SetFillColor(kWhite);
367     latex->DrawLatex(0.22,0.91,"KS test values");
368     canvas3->SetLogy();
369     string nameCanvas3 = "KStestValues_newQCD.pdf";
370     canvas3->SaveAs(nameCanvas3.c_str());
371    
372 jengbou 1.1 }