ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/Jeng/scripts/ikstest.cc
Revision: 1.5
Committed: Tue Sep 7 03:59:59 2010 UTC (14 years, 8 months ago) by jengbou
Content type: text/plain
Branch: MAIN
Changes since 1.4: +326 -295 lines
Log Message:
update

File Contents

# User Rev Content
1 jengbou 1.4 #include "ikstest.h"
2 jengbou 1.1
3 jengbou 1.5 //=================================
4     // Program to run IKS test
5     // * Input directories:
6     // * Data: skimmed_Data_x.xxpb-1
7     // => default selection : Data_<suffix>.root
8     // => n-1 selection : Data_<suffix>_<invName>.root
9     // * MC : skimmed_MC {QCD,WJets,TTbar...}
10     // => default selection : QCD_<suffix>.root
11     // => n-1 selection : Data_<suffix>_<invName>.root
12     // * Upmost output dir specified by <desDir>
13     // subdirectory created according to <useInv> and <realData>
14     // => <desDir'><xyz><suffix>.pdf (.txt)
15     // * e.g. to do KS test on data with MC QCD shape in signal region:
16     // useInv=false; realData=true
17     //=================================
18    
19 jengbou 1.1 using namespace std;
20    
21 jengbou 1.5 //define the constants: 2.88/pb
22     const double weight_[3] = {0.0524313, //QCD
23     0.0091395, //WJets
24     0.000306 //TTbar
25 jengbou 1.2 };
26     const Double_t procQCD = 1.46;
27     const Double_t procWjets = 1.03;
28     const Double_t procttjets = 1.0;
29 jengbou 1.1
30 jengbou 1.5 // Output directory
31     TString desDir = "Results_2.88pb-1/";
32 jengbou 1.4 // User defined parameters
33 jengbou 1.5 bool useInv = false; // whether to use n-1 QCD template
34     bool realData = false;
35     // Ntuples to use
36     TString suffix = "Sel0"; // Suffix of selection
37     TString invNames[2] = {"RelIsogt0p1","D0gt0p02"};
38     map<TString,TCanvas*> cvs; // map of usual histogram
39 jengbou 1.2
40     //=================================
41     // Main program
42     //=================================
43 jengbou 1.1 void ikstest() {
44 jengbou 1.4 gROOT->SetStyle("CMS");
45 jengbou 1.1 TLatex *latex = new TLatex();
46     latex->SetNDC();
47 jengbou 1.5 TString invName;
48     int size_ninv = (useInv ? 2 : 1);
49     for (int ninv = 0;ninv < size_ninv; ++ninv) {
50     invName = invNames[ninv];
51     if (!useInv) {
52     if (!realData) desDir += "MC/";
53     else desDir += "Data_MC/";
54     } else {
55     if (!realData) desDir += "MC_"+invName+"/";
56     else desDir += "Data_"+invName+"/";
57     }
58     struct stat stDir;
59     if (!stat(desDir,&stDir)){
60     cout << "Output folder exists! Continues? (enter to continue; 'q' for quit)" << endl;
61     char incmd;
62     cin.get(incmd);
63     if (incmd == 'q') return;
64     } else {
65     cout << "Creating folder : " << desDir << endl;
66     if (mkdir(desDir,0755) == -1){
67     std::cerr << "Error creating folder" << endl;
68     return;
69     }
70 jengbou 1.4 }
71 jengbou 1.1
72 jengbou 1.5 ofstream outprint(TString(desDir+"Results_"+suffix+".txt"));
73     //open the files with histograms
74     map<string,TFile*> mfile;
75     mfile["Data"] = TFile::Open(TString("skimmed_Data_2.88pb-1/Data_"+suffix+".root"));
76     // n-1 cuts
77     if (useInv) {
78     if (realData)
79     mfile["InvSel"] = TFile::Open(TString("skimmed_Data_2.88pb-1/Data_"+suffix+"_"+invName+".root"));
80     else
81     mfile["InvSel"] = TFile::Open(TString("skimmed_MC/v5/QCD_"+suffix+"_"+invName+".root"));
82 jengbou 1.2 }
83 jengbou 1.5 // RefSel MC
84     mfile["0"] = TFile::Open(TString("skimmed_MC/v5/QCD_"+suffix+".root"));
85     mfile["1"] = TFile::Open(TString("skimmed_MC/v5/WJets_"+suffix+".root"));
86     mfile["2"] = TFile::Open(TString("skimmed_MC/v5/TTbar_"+suffix+".root"));
87    
88     //define histograms and related parameters
89     string histoName[3] = {"h_mu_pt_calo","h_met_calo","h_mt_calo"};
90     string histoLabelX[3] = {"p_{T}^{#mu} [GeV/c]", "#slash{E}_{T} [GeV/c]", "M_{T}^{W} [GeV/c^{2}]"};
91     Int_t xbins[3] = {20,20,40};
92     Double_t xlow[3] = {0.,0.,0.};
93     Double_t xhigh[3] = {100.,100.,200.};
94     string sample[3] = {"QCD","Wjets","ttjets"};
95    
96     TH1F* h_[9];
97     TH1F* mixh_[3];
98     TH1F* hQCD_NEW[3];
99     TH1F* hKSres_[3];
100     TH1F* hKSvalues_[3];
101    
102     //load the histograms from the root files
103     for (int i = 0; i < 3; i++) {// 3 variables
104     //cout << "file[" << i << "] : " << endl;
105     string nameNewHisto = "mix_"+histoName[i];
106     string nameNewHistoSFKS = "finalSF_"+histoName[i];
107     string nameNewHistoKSvalues = "KSvalues_"+histoLabelX[i];
108    
109     mixh_[i] = new TH1F(nameNewHisto.c_str(),"",xbins[i],xlow[i],xhigh[i]);
110     hKSres_[i] = new TH1F(nameNewHistoSFKS.c_str(),"",xbins[i],xlow[i],xhigh[i]);
111     hKSvalues_[i] = new TH1F(nameNewHistoKSvalues.c_str(),"",2./stepsize, stepsize, 2.+stepsize);
112    
113     if (!useInv) {//use QCD MC sample
114     hQCD_NEW[i] = (TH1F*) mfile["0"]->Get(TString(histoName[i]))->Clone();
115     hQCD_NEW[i] -> Scale(weight_[0]);
116     hQCD_NEW[i] -> SetName((histoName[i]).c_str());
117     }
118     else {
119     hQCD_NEW[i] = (TH1F*) mfile["InvSel"]->Get(TString(histoName[i]));
120     if (!realData) hQCD_NEW[i] -> Scale(weight_[0]);
121     hQCD_NEW[i] -> SetName((histoName[i]).c_str());
122     }
123    
124     mixh_[i] -> Sumw2();
125     hKSres_[i] -> Sumw2();
126     hKSvalues_[i] -> Sumw2();
127 jengbou 1.2 }
128 jengbou 1.1
129 jengbou 1.5 for (int n = 0; n < 3; ++n) {// 3 MC samples
130     for (int ihisto = 0; ihisto < 3; ihisto++) {// 3 variables
131     //cout << "Variable[" << ihisto << "]" << endl;
132     string histo_name = histoName[ihisto]+sample[n];
133     ostringstream ss;
134     ss << n;
135     h_[n*3+ihisto] = (TH1F*) mfile[ss.str()]->Get(TString(histoName[ihisto]))->Clone();
136     h_[n*3+ihisto] -> Scale(weight_[n]);
137     h_[n*3+ihisto] -> SetName(histo_name.c_str());
138     }
139 jengbou 1.1 }
140    
141 jengbou 1.5 //create the mixed samples = "data"
142     TString cvsName0 = "Data";
143     if (useInv) {
144     cvsName0 += "_";
145     cvsName0 += invName;
146 jengbou 1.2 }
147 jengbou 1.5 cvs[cvsName0] = new TCanvas(cvsName0,"Data distributions",600,700);
148     cvs[cvsName0]->Divide(3,1);
149     for (int i = 0; i < 3; i++) {
150     cvs[cvsName0]->cd(i+1);
151     if (!realData) {
152     mixh_[i] -> Add(h_[i],h_[i+3], procQCD,procWjets);
153     //mixh_[i] -> Add(mixh_[i],h_[i+6], 1,procttjets);
154     //cout << "histo_name: " << mixh_[0]->GetNbinsX() << endl;
155     }
156     else {
157     TH1F *htmp = (TH1F*) mfile["Data"]->Get(TString(histoName[i]));
158     mixh_[i] -> Add(htmp,1.);
159     }
160     mixh_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
161     mixh_[i]->GetYaxis()->SetTitle("Entries");
162     mixh_[i]->DrawClone();
163 jengbou 1.2 }
164 jengbou 1.5 cvs[cvsName0]->SaveAs(TString(desDir+"Data_distributions.pdf"));
165 jengbou 1.1
166 jengbou 1.5 //define the weight corrections for each sample
167     double NevData = mixh_[2]->Integral();
168     double corr_NevQCD = h_[2]->Integral();
169     double corr_NevQCD_NEW = hQCD_NEW[2]->Integral();
170     double corr_NevWjets = h_[5]->Integral();
171     double corr_Nevttjets = h_[8]->Integral();
172     double corr_Nevmix = procQCD*corr_NevQCD+procWjets*corr_NevWjets;
173     //double corr_Nevmix = procQCD*corr_NevQCD+procWjets*corr_NevWjets+procttjets*corr_Nevttjets;
174     if (!realData)
175     outprint << "Events mix sample = " << corr_Nevmix << endl;
176     else
177     outprint << "Events in Data = " << NevData << endl;
178     outprint << "Events QCD sample = " << corr_NevQCD << endl;
179     outprint << "Events Wjets sample = " << corr_NevWjets << endl;
180     outprint << "Events InvSel sample = " << corr_NevQCD_NEW << endl;
181    
182     //define the containers for chosen numbers (coressponding to the max KStest result)
183     testMC maxProb[3];
184    
185     //define the scale factors calculated using information obtained from all parameters
186     Double_t SFbackg = 0.0;
187     Double_t sumSFbackg = 0.0;
188     Double_t SFsample = 0.0;
189     Double_t sumSFsample = 0.0;
190     Double_t allKS = 0.0;
191    
192     //do the KS test by varying the scale factors
193     for (int i = 0; i < 3; i++) { // 3 variables
194     TH1F *data = (TH1F*)mixh_[i]->Clone();
195     data -> SetName("dataClone");
196     //data -> Scale(1./data->Integral());
197     vector<testMC> resultsKS = doKStest((realData ? NevData : corr_Nevmix),
198     (useInv ? corr_NevQCD_NEW : corr_NevQCD),
199     corr_NevWjets,
200     data, hQCD_NEW[i], h_[i+3]);
201     testMC tksmax = getMax(resultsKS);
202     maxProb[i] = tksmax;
203     outprint << "\nFor the plot " << histoLabelX[i] << " the results are:"<< endl;
204     outprint << "\tmax Probability = " << maxProb[i].prob << endl;
205     outprint << "\tproc_background = " << maxProb[i].scaleF_backg << endl;
206     outprint << "\tproc_sample = " << maxProb[i].scaleF_sample << endl;
207    
208     outprint << "\n\tpercent_B of Data = "
209     << maxProb[i].scaleF_backg*corr_NevQCD_NEW*100/(realData ? NevData : corr_Nevmix) << endl;
210     outprint << "\tpercent_S of Data = "
211     << maxProb[i].scaleF_sample*corr_NevWjets*100/(realData ? NevData : corr_Nevmix) << endl;
212     outprint << "---------------------------" << endl;
213    
214     //create the mixed samples with KS test results
215     sumSFbackg += maxProb[i].prob*maxProb[i].scaleF_backg;
216     sumSFsample += maxProb[i].prob*maxProb[i].scaleF_sample;
217     allKS += maxProb[i].prob;
218    
219     //fill a histogram with the results from the KS test for each variable
220     for (int jiter = 0; jiter < resultsKS.size(); jiter++) {
221     if (resultsKS.at(jiter).prob == 1.)
222     cout << "variable [" << i << "]: prob[" << jiter << "]= " << resultsKS.at(jiter).prob << endl;
223     hKSvalues_[i]->SetBinContent(jiter,resultsKS.at(jiter).prob);
224     }
225     delete data;
226 jengbou 1.1 }
227    
228 jengbou 1.5 //calculate the final scale factors
229     SFbackg = sumSFbackg/allKS;
230     SFsample = sumSFsample/allKS;
231     outprint << "allKS = " << allKS << "\tbackground = " << SFbackg << "\tsample = " << SFsample << endl;
232     outprint << "==> Scale Factor for QCD MC = " << SFbackg*corr_NevQCD_NEW/corr_NevQCD << endl;
233     outprint << "\tcombined percent_B of Data = "
234     << SFbackg*corr_NevQCD_NEW*100/(realData ? NevData : corr_Nevmix) << endl;
235     outprint << "\tcombined percent_S of Data = "
236     << SFsample*corr_NevWjets*100/(realData ? NevData : corr_Nevmix) << endl;
237     outprint << "\n" << endl;
238     outprint << "=================================" << endl;
239     outprint << "\n" << endl;
240    
241    
242     //=================================
243     // Plots
244     //=================================
245     for (int i = 0; i < 3; i++) {// 3 variables
246     hKSres_[i] -> Add(hQCD_NEW[i],h_[i+3],SFbackg,SFsample);
247     outprint << "hKSres->Integral() = " << hKSres_[i]->Integral() << endl;
248     outprint << "Data->Integral() = " << mixh_[i]->Integral() << endl;
249    
250     mixh_[i]->Rebin(2);
251     hQCD_NEW[i]->Rebin(2);
252     h_[i]->Rebin(2);
253     h_[i+3]->Rebin(2);
254     hKSres_[i]->Rebin(2);
255     //hKSvalues_[i]->Rebin(2);
256    
257     mixh_[i] ->SetLineColor(1);
258     hQCD_NEW[i] ->SetLineColor(2);
259     h_[i] ->SetLineColor(4);
260     h_[i+3] ->SetLineColor(3);
261     hKSres_[i] ->SetLineColor(2);
262     hKSvalues_[i]->SetLineColor(i+1);
263    
264     mixh_[i] ->SetMarkerColor(1);
265     hQCD_NEW[i] ->SetMarkerColor(2);
266     h_[i] ->SetMarkerColor(4);
267     h_[i+3] ->SetMarkerColor(3);
268     hKSres_[i] ->SetMarkerColor(2);
269     hKSvalues_[i]->SetMarkerColor(i+1);
270    
271     mixh_[i] ->SetMarkerStyle(24);
272     hQCD_NEW[i] ->SetMarkerStyle(20);
273     h_[i] ->SetMarkerStyle(20);
274     h_[i+3] ->SetMarkerStyle(20);
275     hKSres_[i] ->SetMarkerStyle(20);
276     hKSvalues_[i]->SetMarkerStyle(20);
277    
278     mixh_[i] ->SetMarkerSize(1.4);
279     hQCD_NEW[i] ->SetMarkerSize(1.1);
280     h_[i] ->SetMarkerSize(1.1);
281     h_[i+3] ->SetMarkerSize(1.1);
282     hKSres_[i] ->SetMarkerSize(0.9);
283     hKSvalues_[i]->SetMarkerSize(1.1);
284    
285     mixh_[i] ->SetStats(0);
286     hQCD_NEW[i] ->SetStats(0);
287     h_[i] ->SetStats(0);
288     h_[i+3] ->SetStats(0);
289     hKSres_[i] ->SetStats(0);
290     hKSvalues_[i]->SetStats(0);
291    
292     hKSres_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
293     hKSres_[i]->GetYaxis()->SetTitle("Entries");
294     hKSvalues_[i]->GetXaxis()->SetTitle("iteration #");
295     hKSvalues_[i]->GetYaxis()->SetTitle("KS test values");
296     h_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
297     h_[i]->GetYaxis()->SetTitle("A.U.");
298    
299     TString nameCanvas1 = desDir+histoName[i]+"_QCD_"+suffix+".pdf";
300     TString cvsName1 = histoName[i]+"_QCD";
301     if(useInv) cvsName1 = cvsName1 + "_" + invName;
302     cvs[cvsName1] = new TCanvas(cvsName1,"",600,700);
303     hQCD_NEW[i] -> Scale(1./hQCD_NEW[i]->Integral());
304     h_[i] -> Scale(1./h_[i]->Integral());
305     h_[i+3] -> Scale(1./h_[i+3]->Integral());
306     outprint << "For " << histoName[i] << " , the KStest result btw MC_QCD/InvSel is = "
307     << h_[i] -> KolmogorovTest(hQCD_NEW[i],"") << endl;
308     h_[i]->Draw("P");
309     if (useInv)
310     hQCD_NEW[i]->Draw("sameP");
311     h_[i+3]->Draw("sameP");
312     TLegend *legend1 = new TLegend(0.7, 0.70, 0.9, 0.85);
313     legend1->AddEntry(h_[i], "QCD");
314     if (useInv)
315     legend1->AddEntry(hQCD_NEW[i], "QCD - InvSel");
316     legend1->AddEntry(h_[i+3], "W+jets");
317     legend1->Draw();
318     legend1->SetFillColor(kWhite);
319     //latex->DrawLatex(0.22,0.91,histoName[i].c_str());
320     //cvs[cvsName1]->SetLogy();
321     cvs[cvsName1]->SaveAs(nameCanvas1);
322    
323     TString nameCanvas2 = desDir+histoName[i]+"_dataKS_"+suffix+".pdf";
324     TString cvsName2 = histoName[i]+"_dataKS";
325     if(useInv) cvsName2 = cvsName2 + "_" + invName;
326     cvs[cvsName2] = new TCanvas(cvsName2,"",600,700);
327     hKSres_[i]->Draw("P");
328     mixh_[i]->Draw("sameP");
329     TLegend *legend2 = new TLegend(0.7, 0.70, 0.9, 0.85);
330     legend2->AddEntry(mixh_[i], "Data");
331     legend2->AddEntry(hKSres_[i], "KS result");
332     legend2->Draw();
333     legend2->SetFillColor(kWhite);
334     //latex->DrawLatex(0.22,0.91,histoName[i].c_str());
335     //cvs[cvsName2]->SetLogy();
336     cvs[cvsName2]->SaveAs(nameCanvas2);
337 jengbou 1.2
338 jengbou 1.5 }
339 jengbou 1.2
340    
341 jengbou 1.5 TString cvsName3 = "KStestValues";
342     if(useInv) cvsName3 = cvsName3 + "_" + invName;
343     cvs[cvsName3] = new TCanvas(cvsName3,"",600,700);
344     //hKSvalues_[0]->GetXaxis()->SetRangeUser(0.9,1.2);
345     hKSvalues_[0]->GetYaxis()->SetRangeUser(1e-36,1.2);
346     hKSvalues_[0]->Draw();
347     hKSvalues_[1]->Draw("same");
348     hKSvalues_[2]->Draw("same");
349     TLegend *legend3 = new TLegend(0.7, 0.70, 0.9, 0.85);
350     legend3->AddEntry(hKSvalues_[0], "muon_pT");
351     legend3->AddEntry(hKSvalues_[1], "MET");
352     legend3->AddEntry(hKSvalues_[2], "W_mT");
353     legend3->Draw();
354     legend3->SetFillColor(kWhite);
355     //latex->DrawLatex(0.22,0.91,"KS test values");
356     cvs[cvsName3]->SetLogy();
357     TString nameCanvas3 = desDir+"KStestValues_newQCD"+suffix+".pdf";
358     cvs[cvsName3]->SaveAs(nameCanvas3);
359     }
360 jengbou 1.1 }