ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/Jeng/scripts/ikstest.cc
(Generate patch)

Comparing UserCode/Jeng/scripts/ikstest.cc (file contents):
Revision 1.1 by jengbou, Tue Aug 31 16:01:45 2010 UTC vs.
Revision 1.3 by jengbou, Thu Sep 2 05:36:21 2010 UTC

# Line 8 | Line 8
8   #include <TLegend.h>
9   #include <TObject.h>
10   #include <iostream>
11 + #include <sstream>
12   #include <fstream>
13   #include <iomanip>
14 + #include <map>
15  
16   using namespace std;
17  
18 < //define the constants
19 < double const weight_QCD = 0.015292368;
20 < double const weight_Wjets = 0.002665824;
21 < double const weight_ttjets = 0.00009072;
22 < Double_t const procQCD = 1.51;
23 < Double_t const procWjets = 1.05;
24 < Double_t const procttjets = 1.0;
18 > // User defined parameters
19 > bool useInv           = false; // whether to use n-1 QCD template
20 > bool realData         = false;
21 > const double stepsize = 0.001;
22 >
23 > //define the constants: 2.78/pb
24 > const double weight_[3]    = {0.0506107, //QCD
25 >                              0.0088222, //WJets
26 >                              0.000295   //TTbar
27 > };
28 > const Double_t procQCD     = 1.46;
29 > const Double_t procWjets   = 1.03;
30 > const Double_t procttjets  = 1.0;
31  
32   struct testMC {
33    testMC(Double_t p = 0., Double_t sb = 0., Double_t ss = 0.){prob=p; scaleF_backg = sb; scaleF_sample = ss;}
# Line 29 | Line 37 | struct testMC {
37   };
38  
39   //function for doing KS test
40 < vector<testMC> doKStest(Double_t NmixS, Double_t Ns1, Double_t Ns2, TH1F* mixS, TH1F* s1, TH1F* s2) {
40 > vector<testMC> doKStest(Double_t NsS, Double_t Ns1, Double_t Ns2, TH1F* mixS, TH1F* s1, TH1F* s2) {
41    vector<testMC> output;
42    //define the scale factors
43    Double_t sf1 = 0.0; // QCD
44    Double_t sf2 = 0.0; // Wjets
45    //KS test
46    do {
47 <    sf1 = (NmixS - Ns2*sf2)/Ns1;
47 >    sf1 = (NsS - Ns2*sf2)/Ns1;
48      if (sf1 < 0) break;
49      //cout << "..........sf1 = " << sf1 << endl;
50      int nbins = mixS->GetNbinsX();
# Line 44 | Line 52 | vector<testMC> doKStest(Double_t NmixS,
52      double xmax = mixS->GetXaxis()->GetBinUpEdge(nbins);
53      TH1F *test = new TH1F("test", "", nbins, xmin, xmax);
54      test -> Sumw2();
55 <    test -> Add(s1,s2,sf1,sf2);
55 >    test -> Add(s1,s2,sf1,sf2);
56      //test->Scale(1./(1.*test->Integral()));
57 <    Double_t probability = test -> KolmogorovTest(mixS,"N");
57 >    Double_t probability = mixS -> KolmogorovTest(test,"");
58      testMC temp = testMC(probability,sf1,sf2);
59      output.push_back(temp);
60   //     cout << "probability = " << setw(15) << temp.prob
61   //       << "; sfQCD = "     << setw(10) << temp.scaleF_backg
62 < //       << "; sfWjets = "   << setw(6)  << temp.scaleF_sample << endl;
62 > //       << "; sfWjets = "   << setw(6)  << temp.scaleF_sample << endl;
63      delete test;
64 <    sf2 = sf2 + 0.001;
65 <  } while(sf1 > 0 && sf2 <= 4.0);
64 >    sf2 = sf2 + stepsize;
65 >  } while(sf1 > 0 && sf2 <= 2.0);
66    return output;
67   }
68  
# Line 68 | Line 76 | testMC getMax(vector<testMC> vec) {
76        maxKSRes = vec.at(i);
77      }
78    }
79 <  cout << "for maximum: " << maxKSRes.prob
80 <       << "\tsb = " << maxKSRes.scaleF_backg
81 <       << "\tss = " << maxKSRes.scaleF_sample << endl;
79 >  cout << "for maximum: " << setw(12) << maxKSRes.prob
80 >       << "; sb = " << setw(10) << maxKSRes.scaleF_backg
81 >       << "; ss = " << setw(5) << maxKSRes.scaleF_sample << endl;
82    return maxKSRes;
83   }
84  
85 +
86 + //=================================
87 + // Main program
88 + //=================================
89   void ikstest() {
90    //Style();
91    TLatex *latex = new TLatex();
92    latex->SetNDC();
93  
94 <  ofstream outprint( "ikstest_results.txt" );
83 <
94 >  ofstream outprint( "ikstest_results_20100901.txt" );
95    //open the files with histograms
96 <  TFile *file[3];
97 <  file[0] = TFile::Open("InclusiveMu15_Spring10_all.root");
98 <  file[1] = TFile::Open("WJets_madgraph_Spring10_all.root");
99 <  file[2] = TFile::Open("TTbarJets_madgraph_Spring10_all.root");
96 >  map<string,TFile*> mfile;
97 >  mfile["Data"] = TFile::Open("skimmed_Data_20100901/Data_RefSel_v3.root");
98 >  // n-1 cuts
99 >  if (useInv) {
100 >    if (realData)
101 > //       mfile["InvSel"] = TFile::Open("skimmed_Data_20100825/Data_D0ge0p03.root");
102 >      mfile["InvSel"] = TFile::Open("skimmed_Data_20100901/Data_RelIsoge0p1_v3.root");
103 >    else
104 > //       mfile["InvSel"] = TFile::Open("skimmed_MC/QCD_D0ge0p03.root");
105 >      mfile["InvSel"] = TFile::Open("skimmed_MC/QCD_RelIsoge0p1_v3.root");
106 >  }
107 >
108 >  // RefSel MC
109 >  mfile["0"] = TFile::Open("skimmed_MC/QCD_RefSel_v3.root");
110 >  mfile["1"] = TFile::Open("skimmed_MC/WJets_RefSel_v3.root");
111 >  mfile["2"] = TFile::Open("skimmed_MC/TTbar_RefSel_v3.root");
112  
113    //define histograms and related parameters
114 <  string histoN[3] = {"h_mu_pt","h_met0","h_mt0"};
92 <  string hQCD_new[3] = {"h_mu_pt","h_met0","h_mt0"};
114 >  string histoName[3] = {"h_mu_pt_calo","h_met_calo","h_mt_calo"};
115    string histoLabelX[3] = {"p_{T}^{good Muons}", "E_{T}^{#nu}", "m_{T}^{W}"};
116 <  Int_t xbins[3] = {10,30,30};
116 >  Int_t xbins[3] = {20,20,40};
117    Double_t xlow[3] = {0.,0.,0.};
118 <  Double_t xhigh[3] = {100.,200.,200.};
118 >  Double_t xhigh[3] = {100.,100.,200.};
119    string sample[3] = {"QCD","Wjets","ttjets"};
120  
121    TH1F* h_[9];
# Line 105 | Line 127 | void ikstest() {
127    //load the histograms from the root files
128    for (int i = 0; i < 3; i++) {// 3 variables
129      //cout << "file[" << i << "] : " << endl;
130 <    string nameNewHisto = "mix_"+histoN[i];
131 <    string nameNewHistoSFKS = "finalSF_"+histoN[i];
130 >    string nameNewHisto = "mix_"+histoName[i];
131 >    string nameNewHistoSFKS = "finalSF_"+histoName[i];
132      string nameNewHistoKSvalues = "KSvalues_"+histoLabelX[i];
133  
134      mixh_[i] = new TH1F(nameNewHisto.c_str(),"",xbins[i],xlow[i],xhigh[i]);
135      hKSres_[i] = new TH1F(nameNewHistoSFKS.c_str(),"",xbins[i],xlow[i],xhigh[i]);
136 <    hKSvalues_[i] = new TH1F(nameNewHistoKSvalues.c_str(),"",300, 0.5, 300.5);
137 <    //cout << TString("demo/"+hQCD_new[i]) << endl;
138 <    hQCD_NEW[i] = (TH1F*)file[0]->Get(TString("demo/"+hQCD_new[i])); // corresponds to h_[i]; i=0..2 for now
139 <    hQCD_NEW[i] -> SetName((hQCD_new[i]).c_str());
136 >    hKSvalues_[i] = new TH1F(nameNewHistoKSvalues.c_str(),"",2./stepsize, stepsize, 2.+stepsize);
137 >
138 >    if (!useInv) {//use QCD MC sample
139 >      hQCD_NEW[i] = (TH1F*) mfile["0"]->Get(TString(histoName[i]))->Clone();
140 >      hQCD_NEW[i] -> Scale(weight_[0]);
141 >      hQCD_NEW[i] -> SetName((histoName[i]).c_str());
142 >    }
143 >    else {
144 >      hQCD_NEW[i] = (TH1F*) mfile["InvSel"]->Get(TString(histoName[i]));
145 >      if (!realData) hQCD_NEW[i] -> Scale(weight_[0]);
146 >      hQCD_NEW[i] -> SetName((histoName[i]).c_str());
147 >    }
148  
149      mixh_[i] -> Sumw2();
150      hKSres_[i] -> Sumw2();
151      hKSvalues_[i] -> Sumw2();
152 +  }
153  
154 <    for (int ihisto = 0; ihisto < 3; ihisto++) {
154 >  for (int n = 0; n < 3; ++n) {// 3 MC samples
155 >    for (int ihisto = 0; ihisto < 3; ihisto++) {// 3 variables
156        //cout << "Variable[" << ihisto << "]" << endl;
157 <      string histo_name = histoN[ihisto]+sample[i];
158 <      //cout << TString("demo/"+histoN[ihisto]) << endl;
159 <      h_[i*3+ihisto] = (TH1F*)file[i]->Get(TString("demo/"+histoN[ihisto]));
160 <      h_[i*3+ihisto] -> SetName(histo_name.c_str());
157 >      string histo_name = histoName[ihisto]+sample[n];
158 >      ostringstream ss;
159 >      ss << n;
160 >      h_[n*3+ihisto] = (TH1F*) mfile[ss.str()]->Get(TString(histoName[ihisto]))->Clone();
161 >      h_[n*3+ihisto] -> Scale(weight_[n]);
162 >      h_[n*3+ihisto] -> SetName(histo_name.c_str());
163      }
164    }
165  
166    //create the mixed samples = "data"
167 +  TCanvas *canvas0 = new TCanvas("Data","Data distributions");
168 +  canvas0->Divide(3,1);
169    for (int i = 0; i < 3; i++) {
170 <    mixh_[i] -> Add(h_[i],h_[i+3], procQCD,procWjets);
171 <    //mixh_[i] -> Add(mixh_[i],h_[i+6], 1,procttjets);
172 <    //cout << "histo_name: " << mixh_[0]->GetNbinsX() << endl;
170 >    canvas0->cd(i+1);
171 >    if (!realData) {
172 >      mixh_[i] -> Add(h_[i],h_[i+3], procQCD,procWjets);
173 >      //mixh_[i] -> Add(mixh_[i],h_[i+6], 1,procttjets);
174 >      //cout << "histo_name: " << mixh_[0]->GetNbinsX() << endl;
175 >    }
176 >    else {
177 >      TH1F *htmp = (TH1F*) mfile["Data"]->Get(TString(histoName[i]));
178 >      mixh_[i] -> Add(htmp,1.);
179 >    }
180 >    mixh_[i]->DrawClone();
181    }
182 +  canvas0->SaveAs("Data_distributions.pdf");
183  
184    //define the weight corrections for each sample
185 <  double corr_NevQCD            = (h_[2]->GetEntries())*weight_QCD;
186 <  double corr_NevQCD_NEW        = (hQCD_NEW[2]->GetEntries())*weight_QCD;
187 <  double corr_NevWjets          = (h_[5]->GetEntries())*weight_Wjets;
188 <  double corr_Nevttjets         = (h_[8]->GetEntries())*weight_ttjets;
185 >  double NevData                = mixh_[2]->Integral();
186 >  double corr_NevQCD            = h_[2]->Integral();
187 >  double corr_NevQCD_NEW        = hQCD_NEW[2]->Integral();
188 >  double corr_NevWjets          = h_[5]->Integral();
189 >  double corr_Nevttjets         = h_[8]->Integral();
190    double corr_Nevmix            = procQCD*corr_NevQCD+procWjets*corr_NevWjets;
191 <  //double corr_Nevmix    = procQCD*corr_NevQCD+procWjets*corr_NevWjets+procttjets*corr_Nevttjets;
192 <  outprint << "Events mix sample = " << corr_Nevmix << endl;
193 <  outprint << "Events QCD sample = " << corr_NevQCD << endl;
194 <  outprint << "Events Wjets sample = " << corr_NevWjets << endl;
195 <  outprint << "Events Nev revIso QCD sample = " << corr_NevQCD_NEW << endl;
191 >  //double corr_Nevmix            = procQCD*corr_NevQCD+procWjets*corr_NevWjets+procttjets*corr_Nevttjets;
192 >  if (!realData)
193 >    outprint << "Events mix sample    = " << corr_Nevmix << endl;
194 >  else
195 >    outprint << "Events in Data       = " << NevData << endl;
196 >  outprint << "Events QCD sample    = " << corr_NevQCD << endl;
197 >  outprint << "Events Wjets sample  = " << corr_NevWjets << endl;
198 >  outprint << "Events InvSel sample = " << corr_NevQCD_NEW << endl;
199  
200    //define the containers for chosen numbers (coressponding to the max KStest result)
201    testMC maxProb[3];
# Line 161 | Line 210 | void ikstest() {
210    //do the KS test by varying the scale factors
211    for (int i = 0; i < 3; i++) { // 3 variables
212      TH1F *data = (TH1F*)mixh_[i]->Clone();
213 <    data -> SetName("dataClone");
213 >    data -> SetName("dataClone");
214      //data -> Scale(1./data->Integral());
215 <    vector<testMC> resultsKS = doKStest(corr_Nevmix, corr_NevQCD_NEW, corr_NevWjets, data, hQCD_NEW[i], h_[i+3]);
215 >    vector<testMC> resultsKS = doKStest((realData ? NevData : corr_Nevmix),
216 >                                        (useInv ? corr_NevQCD_NEW : corr_NevQCD),
217 >                                        corr_NevWjets,
218 >                                        data, hQCD_NEW[i], h_[i+3]);
219      testMC tksmax = getMax(resultsKS);
220      maxProb[i] = tksmax;
221      outprint << "\nFor the plot " << histoLabelX[i] << " the results are:"<< endl;
# Line 171 | Line 223 | void ikstest() {
223      outprint << "\tproc_background = " << maxProb[i].scaleF_backg << endl;
224      outprint << "\tproc_sample     = " << maxProb[i].scaleF_sample << endl;
225  
226 <    outprint << "\n\tpercent_B of Data = " << maxProb[i].scaleF_backg*corr_NevQCD_NEW*100/corr_Nevmix << endl;
227 <    outprint << "\tpercent_S of Data = " << maxProb[i].scaleF_sample*corr_NevWjets*100/corr_Nevmix << endl;
226 >    outprint << "\n\tpercent_B of Data = "
227 >             << maxProb[i].scaleF_backg*corr_NevQCD_NEW*100/(realData ? NevData : corr_Nevmix) << endl;
228 >    outprint << "\tpercent_S of Data = "
229 >             << maxProb[i].scaleF_sample*corr_NevWjets*100/(realData ? NevData : corr_Nevmix) << endl;
230      outprint << "---------------------------" << endl;
231  
232      //create the mixed samples with KS test results
# Line 182 | Line 236 | void ikstest() {
236  
237      //fill a histogram with the results from the KS test for each variable
238      for (int jiter = 0; jiter < resultsKS.size(); jiter++) {
239 <      //cout << "prob = " << resultsKS.at(jiter).prob << endl;
239 >      if (resultsKS.at(jiter).prob == 1.)
240 >        cout << "variable [" << i << "]: prob[" << jiter << "]= " << resultsKS.at(jiter).prob << endl;
241        hKSvalues_[i]->SetBinContent(jiter,resultsKS.at(jiter).prob);
242      }
243      delete data;
# Line 192 | Line 247 | void ikstest() {
247    SFbackg = sumSFbackg/allKS;
248    SFsample = sumSFsample/allKS;
249    outprint << "allKS = " << allKS << "\tbackground = " << SFbackg << "\tsample = " << SFsample << endl;
250 <  outprint << "\tcombined percent_B of Data = " << SFbackg*corr_NevQCD_NEW*100/corr_Nevmix << endl;
251 <  outprint << "\tcombined percent_S of Data = " << SFsample*corr_NevWjets*100/corr_Nevmix << endl;
250 >  outprint << "==> Scale Factor for QCD MC = " << SFbackg*corr_NevQCD_NEW/corr_NevQCD << endl;
251 >  outprint << "\tcombined percent_B of Data = "
252 >           << SFbackg*corr_NevQCD_NEW*100/(realData ? NevData : corr_Nevmix) << endl;
253 >  outprint << "\tcombined percent_S of Data = "
254 >           << SFsample*corr_NevWjets*100/(realData ? NevData : corr_Nevmix) << endl;
255    outprint << "\n" << endl;
256    outprint << "=================================" << endl;
257    outprint << "\n" << endl;
258  
259 +
260 +  //=================================
261 +  // Plots
262 +  //=================================
263 +  for (int i = 0; i < 3; i++) {// 3 variables
264 +    hKSres_[i] -> Add(hQCD_NEW[i],h_[i+3],SFbackg,SFsample);
265 +    outprint << "hKSres->Integral() = " << hKSres_[i]->Integral() << endl;
266 +    outprint << "Data->Integral()   = " << mixh_[i]->Integral() << endl;
267 +
268 +    mixh_[i]->Rebin(2);
269 +    hQCD_NEW[i]->Rebin(2);
270 +    h_[i]->Rebin(2);
271 +    h_[i+3]->Rebin(2);
272 +    hKSres_[i]->Rebin(2);
273 +    //hKSvalues_[i]->Rebin(2);
274 +
275 +    mixh_[i]     ->SetLineColor(1);
276 +    hQCD_NEW[i]  ->SetLineColor(3);
277 +    h_[i]        ->SetLineColor(6);
278 +    h_[i+3]      ->SetLineColor(4);
279 +    hKSres_[i]   ->SetLineColor(2);
280 +    hKSvalues_[i]->SetLineColor(i+1);
281 +
282 +    mixh_[i]     ->SetMarkerColor(1);
283 +    hQCD_NEW[i]  ->SetMarkerColor(3);
284 +    h_[i]        ->SetMarkerColor(6);
285 +    h_[i+3]      ->SetMarkerColor(4);
286 +    hKSres_[i]   ->SetMarkerColor(2);
287 +    hKSvalues_[i]->SetMarkerColor(i+1);
288 +
289 +    mixh_[i]     ->SetMarkerStyle(24);
290 +    hQCD_NEW[i]  ->SetMarkerStyle(20);
291 +    h_[i]        ->SetMarkerStyle(20);
292 +    h_[i+3]      ->SetMarkerStyle(20);
293 +    hKSres_[i]   ->SetMarkerStyle(20);
294 +    hKSvalues_[i]->SetMarkerStyle(20);
295 +
296 +    mixh_[i]     ->SetMarkerSize(1.4);
297 +    hQCD_NEW[i]  ->SetMarkerSize(1.1);
298 +    h_[i]        ->SetMarkerSize(1.1);
299 +    h_[i+3]      ->SetMarkerSize(1.1);
300 +    hKSres_[i]   ->SetMarkerSize(0.9);
301 +    hKSvalues_[i]->SetMarkerSize(1.1);
302 +
303 +    mixh_[i]     ->SetStats(0);
304 +    hQCD_NEW[i]  ->SetStats(0);
305 +    h_[i]        ->SetStats(0);
306 +    h_[i+3]      ->SetStats(0);
307 +    hKSres_[i]   ->SetStats(0);
308 +    hKSvalues_[i]->SetStats(0);
309 +
310 +    mixh_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
311 +    mixh_[i]->GetYaxis()->SetTitle("Entries");
312 +    hKSres_[i]->GetXaxis()->SetTitle(histoLabelX[i].c_str());
313 +    hKSres_[i]->GetYaxis()->SetTitle("Entries");
314 +    hKSvalues_[i]->GetXaxis()->SetTitle("iteration #");
315 +    hKSvalues_[i]->GetYaxis()->SetTitle("KS test values");
316 +
317 +    string nameCanvas1 = histoName[i]+"_QCD.pdf";
318 +    TCanvas *canvas1 = new TCanvas(nameCanvas1.c_str(), "");
319 +    hQCD_NEW[i] -> Scale(1./hQCD_NEW[i]->Integral());
320 +    h_[i] -> Scale(1./h_[i]->Integral());
321 +    h_[i+3] -> Scale(1./h_[i+3]->Integral());
322 +    outprint << "For " << histoName[i] << " , the KStest result btw MC_QCD/InvSel is = "
323 +             << h_[i] -> KolmogorovTest(hQCD_NEW[i],"") << endl;
324 +    hQCD_NEW[i]->Draw("P");
325 +    h_[i]->Draw("sameP");
326 +    h_[i+3]->Draw("sameP");
327 +    TLegend *legend1 = new TLegend(0.7, 0.70, 0.9, 0.85);
328 +    legend1->AddEntry(h_[i], "default");
329 +    legend1->AddEntry(h_[i+3], "W+jets");
330 +    legend1->AddEntry(hQCD_NEW[i], "new");
331 +    legend1->Draw();
332 +    legend1->SetFillColor(kWhite);
333 +    latex->DrawLatex(0.22,0.91,histoName[i].c_str());
334 +    canvas1->SetLogy();
335 +    canvas1->SaveAs(nameCanvas1.c_str());
336 +
337 +    string nameCanvas2 = histoName[i]+"_dataKS.pdf";
338 +    TCanvas *canvas2 = new TCanvas(nameCanvas2.c_str(), "");
339 +    hKSres_[i]->Draw("P");
340 +    mixh_[i]->Draw("sameP");
341 +    TLegend *legend2 = new TLegend(0.7, 0.70, 0.9, 0.85);
342 +    legend2->AddEntry(mixh_[i], "Data");
343 +    legend2->AddEntry(hKSres_[i], "KS result");
344 +    legend2->Draw();
345 +    legend2->SetFillColor(kWhite);
346 +    latex->DrawLatex(0.22,0.91,histoName[i].c_str());
347 +    canvas2->SetLogy();
348 +    canvas2->SaveAs(nameCanvas2.c_str());
349 +
350 +  }
351 +
352 +
353 +  TCanvas *canvas3 = new TCanvas("KStestValues", "");
354 +  //hKSvalues_[0]->GetXaxis()->SetRangeUser(0.9,1.1);
355 +  //hKSvalues_[0]->GetYaxis()->SetRangeUser(1e-3,1.1);
356 +  hKSvalues_[0]->Draw();
357 +  hKSvalues_[1]->SetLineColor(2);
358 +  hKSvalues_[1]->Draw("same");
359 +  hKSvalues_[2]->SetLineColor(4);
360 +  hKSvalues_[2]->Draw("same");
361 +  TLegend *legend3 = new TLegend(0.7, 0.70, 0.9, 0.85);
362 +  legend3->AddEntry(hKSvalues_[0], "muon_pT");
363 +  legend3->AddEntry(hKSvalues_[1], "MET");
364 +  legend3->AddEntry(hKSvalues_[2], "W_mT");
365 +  legend3->Draw();
366 +  legend3->SetFillColor(kWhite);
367 +  latex->DrawLatex(0.22,0.91,"KS test values");
368 +  canvas3->SetLogy();
369 +  string nameCanvas3 = "KStestValues_newQCD.pdf";
370 +  canvas3->SaveAs(nameCanvas3.c_str());
371 +
372   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines