ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/cbrown/Development/Plotting/Modules/EdgeLimit.C
Revision: 1.4
Committed: Mon Jun 25 16:45:40 2012 UTC (12 years, 10 months ago) by buchmann
Content type: text/plain
Branch: MAIN
Changes since 1.3: +128 -7 lines
Log Message:
Converted python functions for limit computation, exclusion, lm point preparation, and toy generation to c++

File Contents

# User Rev Content
1 buchmann 1.1 #include <iostream>
2    
3 buchmann 1.2 #include <RooRealVar.h>
4     #include <RooArgSet.h>
5     #include <RooDataSet.h>
6 buchmann 1.4 #include <RooMCStudy.h>
7     #include <RooCategory.h>
8    
9 buchmann 1.3 #include <RooStats/ModelConfig.h>
10     #include "RooStats/ProfileLikelihoodCalculator.h"
11     #include "RooStats/LikelihoodInterval.h"
12     #include "RooStats/HypoTestResult.h"
13     #include "RooStats/SimpleLikelihoodRatioTestStat.h"
14     #include "RooStats/ProfileLikelihoodTestStat.h"
15     #include "RooStats/HybridCalculatorOriginal.h"
16     #include "RooStats/HypoTestInverterOriginal.h"
17    
18 buchmann 1.2
19 buchmann 1.1 using namespace std;
20     using namespace PlottingSetup;
21    
22    
23 buchmann 1.2
24    
25 buchmann 1.1 ShapeDroplet LimitsFromEdge(float low_fullCLs, float high_CLs, TTree *events, string addcut, string name, string mcjzb, string datajzb, vector<float> jzbbins, float jzbpeakerrormc, float jzbpeakerrordata) {
26     write_error(__FUNCTION__,"Not implemented edge limits yet (returning crap)");
27     ShapeDroplet beta;beta.observed=-12345;beta.SignalIntegral=1;return beta;
28     }
29    
30    
31     void PrepareEdgeShapes(string mcjzb, string datajzb, vector<float> jzbbins, float jzbpeakerrordata) {
32     write_error(__FUNCTION__,"Not implemented edge shape storage yet");
33     }
34    
35    
36 buchmann 1.2 ///------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
37    
38    
39     namespace EdgeFitter {
40    
41     void DoEdgeFit(string mcjzb, string datajzb, float DataPeakError, float MCPeakError, float jzb_cut, int icut, int is_data, TCut cut, TTree*);
42     void DoEdgeFit(string mcjzb, string datajzb, float DataPeakError, float MCPeakError, vector<float> jzb_cut, int is_data, TCut cut, TTree*);
43 buchmann 1.4 void getMedianLimit(RooWorkspace *ws,vector<RooDataSet*> theToys,float &median,float &sigmaDown, float &sigmaUp, float &twoSigmaDown, float &twoSigmaUp);
44 buchmann 1.2 void InitializeVariables(float _mllmin, float _mllmax, float _jzbmax, TCut _cut);
45     void PrepareDatasets(int);
46     void DoFit();
47     string RandomStorageFile();
48     Yield Get_Z_estimate(float,int);
49     Yield Get_T_estimate(float,int);
50 buchmann 1.4 float calcExclusion(RooWorkspace *ws, RooDataSet *data = NULL);
51     void prepareLimits(RooWorkspace *ws);
52     vector<RooDataSet*> generateToys(RooWorkspace *ws, int nToys);
53     void prepareLimits(RooWorkspace *ws, bool ComputeBands);
54     TGraph* prepareLM(float mass, float nEv);
55 buchmann 1.2
56     float jzbmax;
57     float mllmin;
58     float mllmax;
59     TCut cut;
60    
61     RooDataSet* AllData;
62     RooDataSet* eeSample;
63     RooDataSet* mmSample;
64     RooDataSet* emSample;
65    
66     bool MarcoDebug;
67     }
68    
69 buchmann 1.4 TGraph* EdgeFitter::prepareLM(float mass, float nEv) {
70     float massLM[1];
71     massLM[0]=mass;
72     float accEffLM[1];
73     accEffLM[0]=nEv/PlottingSetup::luminosity;
74     TGraph *lm = new TGraph(1, massLM, accEffLM);
75     lm->GetXaxis()->SetNoExponent(true);
76     lm->GetXaxis()->SetTitle("m_{cut} [GeV]");
77     lm->GetYaxis()->SetTitle("#sigma #times A [pb] 95% CL UL");
78     lm->GetXaxis()->SetLimits(0.,300.);
79     lm->GetYaxis()->SetRangeUser(0.,0.08);
80     lm->SetMarkerStyle(34);
81     lm->SetMarkerColor(kRed);
82     return lm;
83     }
84    
85     vector<RooDataSet*> EdgeFitter::generateToys(RooWorkspace *ws, int nToys) {
86     ws->var("nSig")->setVal(0.);
87     ws->var("nSig")->setConstant(true);
88     RooFitResult* fit = ws->pdf("combModel")->fitTo(*ws->data("data_obs"),RooFit::Save());
89     vector<RooDataSet*> theToys;
90    
91     RooMCStudy mcEE(*ws->pdf("combModel"),RooArgSet(*ws->var("inv")),RooFit::Slice(*ws->cat("cat"),"EE"));
92     mcEE.generate(nToys,44,true);
93     RooMCStudy mcMM(*ws->pdf("combModel"),RooArgSet(*ws->var("inv")),RooFit::Slice(*ws->cat("cat"),"MM"));
94     mcMM.generate(nToys,44,true);
95     RooMCStudy mcOSOF(*ws->pdf("combModel"),RooArgSet(*ws->var("inv")),RooFit::Slice(*ws->cat("cat"),"OSOF"));
96     mcOSOF.generate(nToys,44,true);
97    
98     RooRealVar mll("mll","mll",mllmin,mllmax,"GeV/c^{2}");
99     RooRealVar id1("id1","id1",0,1,"GeV/c^{2}");
100     RooRealVar id2("id2","id2",0,1,"GeV/c^{2}");
101     RooRealVar jzb("jzb","jzb",-jzbmax,jzbmax,"GeV/c");
102     RooRealVar weight("weight","weight",0,1000,"");
103     RooArgSet observables(mll,jzb,id1,id2,weight);
104    
105     for(int i=0;i<nToys;i++) {
106     RooDataSet* toyEE = (RooDataSet*)mcEE.genData(i);
107     RooDataSet* toyMM = (RooDataSet*)mcMM.genData(i);
108     RooDataSet* toyOSOF = (RooDataSet*)mcOSOF.genData(i);
109     stringstream toyname;
110     toyname << "theToy_" << i;
111     write_warning(__FUNCTION__,"Problem while adding toys");
112     // RooDataSet *toyData = RooDataSet(toyname.str(),toyname.str(),observables,RooFit::Index(ws->cat("cat")),RooFit::Import("OSOF",*toyOSOF),RooFit::Import("EE",*toyEE),RooFit::Import("MM",*toyMM));
113     // theToys.push_back(toyData);
114     }
115     ws->var("nSig")->setVal(17.0);
116     ws->var("nSig")->setConstant(false);
117     return theToys;
118     }
119    
120     void EdgeFitter::getMedianLimit(RooWorkspace *ws,vector<RooDataSet*> theToys,float &median,float &sigmaDown, float &sigmaUp, float &twoSigmaDown, float &twoSigmaUp) {
121     TH1F *gauLimit = new TH1F("gausLimit","gausLimit",60,0.,80./PlottingSetup::luminosity);
122     vector<float> theLimits;
123     for(int itoy=0;itoy<theToys.size();itoy++) {
124     float theLimit = calcExclusion(ws,theToys[itoy]);
125     if(theLimit > 0 ) gauLimit->Fill(theLimit);
126     }
127     const Int_t nQ = 4;
128     Double_t yQ[nQ] = {0.,0.,0.,0.};
129     Double_t xQ[nQ] = {0.022750132,0.1586552539,0.84134474609999998,0.977249868};
130     gauLimit->GetQuantiles(nQ,yQ,xQ);
131     median = gauLimit->GetMean();
132     // median = median1(gauLimit);
133     twoSigmaDown = abs(yQ[0]-median);
134     sigmaDown = abs(yQ[1]-median);
135     sigmaUp = abs(yQ[2]-median);
136     twoSigmaUp = abs(yQ[3]-median);
137     cout << median * PlottingSetup::luminosity << " " << sigmaUp * PlottingSetup::luminosity << endl;
138     }
139    
140     void EdgeFitter::prepareLimits(RooWorkspace *ws, bool ComputeBands) {
141     if(ComputeBands) {
142     vector<RooDataSet*> theToys = EdgeFitter::generateToys(ws,50);
143     vector<float> medVals;
144     vector<float> medLimits;
145     vector<float> sigmaLimitsDown;
146     vector<float> sigmaLimitsUp;
147     vector<float> twoSigmaLimitsDown;
148     vector<float> twoSigmaLimitsUp;
149     for(int i=20;i<=320;i+=40) {
150     ws->var("nSig")->setVal(10.0);
151     medVals.push_back((float)i);
152     ws->var("m0")->setVal((float)i);
153     ws->var("m0")->setConstant(true);
154     float Smedian,SsigmaDown,SsigmaUp,StwoSigmaDown,StwoSigmaUp;
155     EdgeFitter::getMedianLimit(ws,theToys,Smedian,SsigmaDown,SsigmaUp,StwoSigmaDown,StwoSigmaUp);
156     medLimits.push_back(Smedian);
157     sigmaLimitsDown.push_back(SsigmaDown);
158     sigmaLimitsUp.push_back(SsigmaUp);
159     twoSigmaLimitsDown.push_back(StwoSigmaDown);
160     twoSigmaLimitsUp.push_back(StwoSigmaUp);
161     }
162     write_warning(__FUNCTION__,"Still need to store limits");
163     } else {
164     vector<float> theVals;
165     vector<float> theLimits;
166     for(int i=20;i<300;i+=5) {
167     ws->var("nSig")->setVal(0.0);
168     theVals.push_back((float)i);
169     ws->var("m0")->setVal((float)i);
170     ws->var("m0")->setConstant(true);
171     theLimits.push_back(calcExclusion(ws));
172     }
173    
174     for(int i=0;i<theLimits.size();i++) {
175     if((theLimits[i]<2.0/PlottingSetup::luminosity)||(theLimits[i]>40.0/PlottingSetup::luminosity)) {
176     cout << i << " : " << theVals[i] << endl;
177     theLimits[i] = (theLimits[i+2]+theLimits[i-2])/2.0;
178     write_warning(__FUNCTION__,"Need to store limits");
179     }
180     write_warning(__FUNCTION__,"Need to store limits");
181     }
182     }
183     }
184    
185    
186     float EdgeFitter::calcExclusion(RooWorkspace *ws, RooDataSet *data) {
187 buchmann 1.3 RooRealVar mu("mu","nSig",0,10000,"");
188     RooArgSet poi = RooArgSet(mu);
189     RooArgSet *nullParams = (RooArgSet*)poi.snapshot();
190     nullParams->setRealValue("nSig",0);
191     RooStats::ModelConfig *model = new RooStats::ModelConfig();
192     model->SetWorkspace(*ws);
193     model->SetPdf("combModel");
194     model->SetParametersOfInterest(poi);
195 buchmann 1.4 if(!data) data = (RooDataSet*)ws->data("data_obs");
196 buchmann 1.3
197     RooStats::ProfileLikelihoodCalculator plc(*data, *model);
198     plc.SetNullParameters(*nullParams);
199     plc.SetTestSize(0.05);
200     RooStats::LikelihoodInterval* interval = plc.GetInterval();
201     RooStats::HypoTestResult *htr = plc.GetHypoTest();
202     double theLimit = interval->UpperLimit( mu );
203     cout << "Significance " << htr->Significance() << endl;
204    
205     ws->defineSet("obs","nB");
206     ws->defineSet("poi","nSig");
207    
208     RooStats::ModelConfig b_model = RooStats::ModelConfig("B_model", ws);
209     b_model.SetPdf(*ws->pdf("combModel"));
210     b_model.SetObservables(*ws->set("obs"));
211     b_model.SetParametersOfInterest(*ws->set("poi"));
212     ws->var("nSig")->setVal(0.0); //# important!
213     b_model.SetSnapshot(*ws->set("poi"));
214    
215     RooStats::ModelConfig sb_model = RooStats::ModelConfig("S+B_model", ws);
216     sb_model.SetPdf(*ws->pdf("combModel"));
217     sb_model.SetObservables(*ws->set("obs"));
218     sb_model.SetParametersOfInterest(*ws->set("poi"));
219     ws->var("nSig")->setVal(64.0); //# important!
220     sb_model.SetSnapshot(*ws->set("poi"));
221    
222     RooStats::SimpleLikelihoodRatioTestStat slrts = RooStats::SimpleLikelihoodRatioTestStat(*b_model.GetPdf(),*sb_model.GetPdf());
223     slrts.SetNullParameters(*b_model.GetSnapshot());
224     slrts.SetAltParameters(*sb_model.GetSnapshot());
225     RooStats::ProfileLikelihoodTestStat profll = RooStats::ProfileLikelihoodTestStat(*b_model.GetPdf());
226    
227     RooStats::HybridCalculatorOriginal hc = RooStats::HybridCalculatorOriginal(*data, sb_model, b_model,0,0);
228     hc.SetTestStatistic(2);
229     hc.SetNumberOfToys(50);
230    
231     RooStats::HypoTestInverterOriginal hcInv = RooStats::HypoTestInverterOriginal(hc,*ws->var("nSig"));
232     hcInv.SetTestSize(0.05);
233     hcInv.UseCLs(true);
234     hcInv.RunFixedScan(5,theLimit-0.5,theLimit+0.5);;
235     RooStats::HypoTestInverterResult* hcInt = hcInv.GetInterval();
236     float ulError = hcInt->UpperLimitEstimatedError();
237     theLimit = hcInt->UpperLimit();
238     cout << "Found upper limit : " << theLimit << "+/-" << ulError << endl;
239    
240     return theLimit/PlottingSetup::luminosity;
241    
242     }
243    
244 buchmann 1.2 TTree* SkimTree(int isample) {
245     TTree* newTree = allsamples.collection[isample].events->CloneTree(0);
246     float xsweight=1.0;
247     if(allsamples.collection[isample].is_data==false) xsweight=luminosity*allsamples.collection[isample].weight;
248     if(EdgeFitter::MarcoDebug) {
249     cout << " Original tree contains " << allsamples.collection[isample].events->GetEntries() << endl;
250     cout << " Going to reduce it with cut " << EdgeFitter::cut << endl;
251     }
252     TTreeFormula *select = new TTreeFormula("select", EdgeFitter::cut, allsamples.collection[isample].events);
253     float wgt=1.0;
254     allsamples.collection[isample].events->SetBranchAddress(cutWeight,&wgt);
255     for (Int_t entry = 0 ; entry < allsamples.collection[isample].events->GetEntries() ; entry++) {
256     allsamples.collection[isample].events->LoadTree(entry);
257     if (select->EvalInstance()) {
258     allsamples.collection[isample].events->GetEntry(entry);
259     wgt=wgt*xsweight;
260     newTree->Fill();
261     }
262     }
263    
264     if(EdgeFitter::MarcoDebug) cout << " Reduced tree contains " << newTree->GetEntries() << " entries " << endl;
265     return newTree;
266     }
267    
268     void EdgeFitter::InitializeVariables(float _mllmin, float _mllmax, float _jzbmax, TCut _cut) {
269     mllmin=_mllmin;
270     mllmax=_mllmax;
271     jzbmax=_jzbmax;
272     cut=_cut;
273     }
274    
275     void EdgeFitter::PrepareDatasets(int is_data) {
276     TTree *completetree;
277     bool hashit=0;
278     for(int isample=0;isample<allsamples.collection.size();isample++) {
279     if(!allsamples.collection[isample].is_active) continue;
280     if(is_data==1&&allsamples.collection[isample].is_data==false) continue;//kick all samples that aren't data if we're looking for data.
281     if(is_data==1&&allsamples.collection[isample].is_data==false) continue;//kick all samples that aren't data if we're looking for data.
282     if(is_data!=1&&allsamples.collection[isample].is_data==true) continue;//kick all data samples when looking for MC
283     if(is_data!=2&&allsamples.collection[isample].is_signal==true) continue;//remove signal sample if we don't want it.
284     if(EdgeFitter::MarcoDebug) cout << "Considering : " << allsamples.collection[isample].samplename << endl;
285     if(!hashit) {
286     hashit=true;
287     completetree = SkimTree(isample)->CloneTree();
288     } else {
289     completetree->CopyEntries(SkimTree(isample));
290     }
291     if(EdgeFitter::MarcoDebug) cout << "Complete tree now contains " << completetree->GetEntries() << " entries " << endl;
292     }
293    
294     RooRealVar mll("mll","mll",mllmin,mllmax,"GeV/c^{2}");
295     RooRealVar id1("id1","id1",0,1,"GeV/c^{2}");
296     RooRealVar id2("id2","id2",0,1,"GeV/c^{2}");
297     RooRealVar jzb("jzb","jzb",-jzbmax,jzbmax,"GeV/c");
298     RooRealVar weight("weight","weight",0,1000,"");
299     RooArgSet observables(mll,jzb,id1,id2,weight);
300    
301     string title="CMS Data";
302     if(is_data!=1) title="CMS SIMULATION";
303     RooDataSet LAllData("LAllData",title.c_str(),completetree,observables,"","weight");
304     completetree->Write();
305     // delete completetree;
306    
307     EdgeFitter::eeSample = (RooDataSet*)LAllData.reduce("id1==id2&&id1==0");
308     EdgeFitter::mmSample = (RooDataSet*)LAllData.reduce("id1==id2&&id1==1");
309     EdgeFitter::emSample = (RooDataSet*)LAllData.reduce("id1!=id2");
310     EdgeFitter::AllData = (RooDataSet*)LAllData.reduce("id1!=id2||id1==id2");
311    
312     eeSample->SetName("eeSample");
313     mmSample->SetName("mmSample");
314     emSample->SetName("emSample");
315     AllData->SetName("AllData");
316    
317     if(EdgeFitter::MarcoDebug) {
318     cout << "Number of events in data sample = " << AllData->numEntries() << endl;
319     cout << "Number of events in ee sample = " << eeSample->numEntries() << endl;
320     cout << "Number of events in mm sample = " << mmSample->numEntries() << endl;
321     cout << "Number of events in em sample = " << emSample->numEntries() << endl;
322     }
323     }
324    
325     string EdgeFitter::RandomStorageFile() {
326     TRandom3 *r = new TRandom3(0);
327     int rho = (int)r->Uniform(1,10000000);
328     stringstream RandomFile;
329     RandomFile << PlottingSetup::cbafbasedir << "/exchange/TempEdgeFile_" << rho << ".root";
330     delete r;
331     return RandomFile.str();
332     }
333    
334     Yield EdgeFitter::Get_Z_estimate(float jzb_cut, int icut) {
335     if(MarcoDebug) write_error(__FUNCTION__,"Returning random Z yield");
336     Yield a(123,45,67); return a;
337    
338     return PlottingSetup::allresults.predictions[icut].Zbkg;
339     }
340    
341     Yield EdgeFitter::Get_T_estimate(float jzb_cut, int icut) {
342     if(MarcoDebug) write_error(__FUNCTION__,"Returning random TTbar yield");
343     Yield a(1234,56,78); return a;
344     return PlottingSetup::allresults.predictions[icut].Flavorsym;
345     }
346    
347     void EdgeFitter::DoEdgeFit(string mcjzb, string datajzb, float DataPeakError, float MCPeakError, float jzb_cut, int icut, int is_data, TCut cut, TTree *signalevents=0) {
348    
349     string storagefile=EdgeFitter::RandomStorageFile();
350     TFile *f = new TFile(storagefile.c_str(),"RECREATE");
351     EdgeFitter::InitializeVariables(iMllLow,iMllHigh,jzbHigh,cut);
352    
353     Yield Zestimate=EdgeFitter::Get_Z_estimate(jzb_cut,icut);
354     Yield Testimate=EdgeFitter::Get_T_estimate(jzb_cut,icut);
355     cout << "Cut at JZB>" << jzb_cut << "; using estimates: " << endl;
356     cout << " Z : " << Zestimate << endl;
357     cout << " T : " << Testimate << endl;
358    
359     EdgeFitter::PrepareDatasets(is_data);
360    
361     EdgeFitter::eeSample->Write();
362     EdgeFitter::emSample->Write();
363     EdgeFitter::mmSample->Write();
364     EdgeFitter::AllData->Write();
365     f->Close();
366    
367     write_warning(__FUNCTION__,"Work continues here");
368    
369     if(EdgeFitter::MarcoDebug) write_warning(__FUNCTION__,"Need to uncomment the next line (remove the output file)");
370     // gSystem->Exec(("rm "+storagefile).c_str());
371     }
372    
373     void DoEdgeFit(string mcjzb, string datajzb, float DataPeakError, float MCPeakError, vector<float> jzb_cut, int is_data, TCut cut, TTree *signalevents=0) {
374     for(int icut=0;icut<jzb_cut.size();icut++) {
375     stringstream addcut;
376     if(is_data==1) addcut << "(" << datajzb << ">" << jzb_cut[icut] << ")";
377     if(is_data!=1) addcut << "(" << mcjzb << ">" << jzb_cut[icut] << ")";
378     TCut jcut(addcut.str().c_str());
379    
380     EdgeFitter::DoEdgeFit(mcjzb, datajzb, DataPeakError, MCPeakError, jzb_cut[icut], icut, is_data, jcut&&cut, signalevents);
381    
382     }
383     }