ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/auterman/scripts/CLs/TLimit.cc
Revision: 1.1.1.1 (vendor branch)
Committed: Thu Apr 1 15:14:10 2010 UTC (15 years, 1 month ago) by auterman
Content type: text/plain
Branch: CLs, MAIN
CVS Tags: start, HEAD
Changes since 1.1: +0 -0 lines
Log Message:
some cls demo plots

File Contents

# User Rev Content
1 auterman 1.1 // @(#)root/hist:$Id: TLimit.cxx 26221 2008-11-17 08:10:26Z brun $
2     // Author: Christophe.Delaere@cern.ch 21/08/2002
3    
4     ///////////////////////////////////////////////////////////////////////////
5     //
6     // TLimit
7     //
8     // Class to compute 95% CL limits
9     //
10     // adapted from the mclimit code from Tom Junk (CLs method)
11     // see http://root.cern.ch/root/doc/TomJunk.pdf
12     // see http://cern.ch/thomasj/searchlimits/ecl.html
13     // see: Tom Junk,NIM A434, p. 435-443, 1999
14     //
15     // see also the following interesting references:
16     // Alex Read, "Presentation of search results: the CLs technique"
17     // Journal of Physics G: Nucl. Part. Phys. 28 2693-2704 (2002).
18     // http://www.iop.org/EJ/abstract/0954-3899/28/10/313/
19     //
20     // A nice article is also available in the CERN yellow report with the proceeding
21     // of the 2000 CERN workshop on confidence intervals.
22     //
23     // Alex Read, "Modified Frequentist Analysis of Search Results (The CLs Method)"
24     // CERN 2000-005 (30 May 2000)
25     //
26     // see note about: "Should I use TRolke, TFeldmanCousins, TLimit?"
27     // in the TRolke class description.
28     //
29     ///////////////////////////////////////////////////////////////////////////
30    
31     #include "TLimit.h"
32     #include "TArrayD.h"
33     #include "TVectorD.h"
34     #include "TOrdCollection.h"
35     #include "TConfidenceLevel.h"
36     #include "TLimitDataSource.h"
37     #include "TRandom3.h"
38     #include "TH1.h"
39     #include "TObjArray.h"
40     #include "TMath.h"
41     #include "TIterator.h"
42     #include "TObjString.h"
43     #include "TClassTable.h"
44     #include "Riostream.h"
45    
46     ClassImp(TLimit)
47    
48     TArrayD *TLimit::fgTable = new TArrayD(0);
49     TOrdCollection *TLimit::fgSystNames = new TOrdCollection();
50    
51     TConfidenceLevel *TLimit::ComputeLimit(TLimitDataSource * data,
52     Int_t nmc, bool stat,
53     TRandom * generator)
54     {
55     // class TLimit
56     // ------------
57     // Algorithm to compute 95% C.L. limits using the Likelihood ratio
58     // semi-bayesian method.
59     // It takes signal, background and data histograms wrapped in a
60     // TLimitDataSource as input and runs a set of Monte Carlo experiments in
61     // order to compute the limits. If needed, inputs are fluctuated according
62     // to systematics. The output is a TConfidenceLevel.
63     //
64     // class TLimitDataSource
65     // ----------------------
66     //
67     // Takes the signal, background and data histograms as well as different
68     // systematics sources to form the TLimit input.
69     //
70     // class TConfidenceLevel
71     // ----------------------
72     //
73     // Final result of the TLimit algorithm. It is created just after the
74     // time-consuming part and can be stored in a TFile for further processing.
75     // It contains light methods to return CLs, CLb and other interesting
76     // quantities.
77     //
78     // The actual algorithm...
79     // From an input (TLimitDataSource) it produces an output TConfidenceLevel.
80     // For this, nmc Monte Carlo experiments are performed.
81     // As usual, the larger this number, the longer the compute time,
82     // but the better the result.
83     //Begin_Html
84     /*
85     <FONT SIZE=+0>
86     <p>Supposing that there is a plotfile.root file containing 3 histograms
87     (signal, background and data), you can imagine doing things like:</p>
88     <p>
89     <BLOCKQUOTE><PRE>
90     TFile* infile=new TFile("plotfile.root","READ");
91     infile->cd();
92     TH1* sh=(TH1*)infile->Get("signal");
93     TH1* bh=(TH1*)infile->Get("background");
94     TH1* dh=(TH1*)infile->Get("data");
95     TLimitDataSource* mydatasource = new TLimitDataSource(sh,bh,dh);
96     TConfidenceLevel *myconfidence = TLimit::ComputeLimit(mydatasource,50000);
97     cout &lt&lt " CLs : " &lt&lt myconfidence->CLs() &lt&lt endl;
98     cout &lt&lt " CLsb : " &lt&lt myconfidence->CLsb() &lt&lt endl;
99     cout &lt&lt " CLb : " &lt&lt myconfidence->CLb() &lt&lt endl;
100     cout &lt&lt "&lt CLs &gt : " &lt&lt myconfidence->GetExpectedCLs_b() &lt&lt endl;
101     cout &lt&lt "&lt CLsb &gt : " &lt&lt myconfidence->GetExpectedCLsb_b() &lt&lt endl;
102     cout &lt&lt "&lt CLb &gt : " &lt&lt myconfidence->GetExpectedCLb_b() &lt&lt endl;
103     delete myconfidence;
104     delete mydatasource;
105     infile->Close();
106     </PRE></BLOCKQUOTE></p>
107     <p></p>
108     <p>More informations can still be found on
109     <a HREF="http://cern.ch/aleph-proj-alphapp/doc/tlimit.html">this</a> page.</p>
110     </FONT>
111     */
112     //End_Html
113    
114     // The final object returned...
115     TConfidenceLevel *result = new TConfidenceLevel(nmc);
116     // The random generator used...
117     TRandom *myrandom = generator ? generator : new TRandom3;
118     // Compute some total quantities on all the channels
119     Int_t maxbins = 0;
120     Double_t nsig = 0;
121     Double_t nbg = 0;
122     Int_t ncand = 0;
123     Int_t i;
124     for (i = 0; i <= data->GetSignal()->GetLast(); i++) {
125     maxbins = (((TH1 *) (data->GetSignal()->At(i)))->GetNbinsX() + 2) > maxbins ?
126     (((TH1 *) (data->GetSignal()->At(i)))->GetNbinsX() + 2) : maxbins;
127     nsig += ((TH1 *) (data->GetSignal()->At(i)))->Integral();
128     nbg += ((TH1 *) (data->GetBackground()->At(i)))->Integral();
129     ncand += (Int_t) ((TH1 *) (data->GetCandidates()->At(i)))->Integral();
130     }
131     result->SetBtot(nbg);
132     result->SetStot(nsig);
133     result->SetDtot(ncand);
134     Double_t buffer = 0;
135     fgTable->Set(maxbins * (data->GetSignal()->GetLast() + 1));
136     for (Int_t channel = 0; channel <= data->GetSignal()->GetLast(); channel++)
137     for (Int_t bin = 0;
138     bin <= ((TH1 *) (data->GetSignal()->At(channel)))->GetNbinsX()+1;
139     bin++) {
140     Double_t s = (Double_t) ((TH1 *) (data->GetSignal()->At(channel)))->GetBinContent(bin);
141     Double_t b = (Double_t) ((TH1 *) (data->GetBackground()->At(channel)))->GetBinContent(bin);
142     Double_t d = (Double_t) ((TH1 *) (data->GetCandidates()->At(channel)))->GetBinContent(bin);
143     // Compute the value of the "-2lnQ" for the actual data
144     if ((b == 0) && (s > 0)) {
145     cout << "WARNING: Ignoring bin " << bin << " of channel "
146     << channel << " which has s=" << s << " but b=" << b << endl;
147     cout << " Maybe the MC statistic has to be improved..." << endl;
148     }
149     if ((s > 0) && (b > 0))
150     buffer += LogLikelihood(s, b, b, d);
151     // precompute the log(1+s/b)'s in an array to speed up computation
152     // background-free bins are set to have a maximum t.s. value
153     // for protection (corresponding to s/b of about 5E8)
154     if ((s > 0) && (b > 0))
155     fgTable->AddAt(LogLikelihood(s, b, b, 1), (channel * maxbins) + bin);
156     else if ((s > 0) && (b == 0))
157     fgTable->AddAt(20, (channel * maxbins) + bin);
158     }
159     result->SetTSD(buffer);
160     // accumulate MC experiments. Hold the test statistic function fixed, but
161     // fluctuate s and b within syst. errors for computing probabilities of
162     // having that outcome. (Alex Read's prescription -- errors are on the ensemble,
163     // not on the observed test statistic. This technique does not split outcomes.)
164     // keep the tstats as sum log(1+s/b). convert to -2lnQ when preparing the results
165     // (reason -- like to keep the < signs right)
166     Double_t *tss = new Double_t[nmc];
167     Double_t *tsb = new Double_t[nmc];
168     Double_t *lrs = new Double_t[nmc];
169     Double_t *lrb = new Double_t[nmc];
170     TLimitDataSource* tmp_src = (TLimitDataSource*)(data->Clone());
171     TLimitDataSource* tmp_src2 = (TLimitDataSource*)(data->Clone());
172     for (i = 0; i < nmc; i++) {
173     tss[i] = 0;
174     tsb[i] = 0;
175     lrs[i] = 0;
176     lrb[i] = 0;
177     // fluctuate signal and background
178     TLimitDataSource* fluctuated = Fluctuate(data, tmp_src, !i, myrandom, stat) ? tmp_src : data;
179     // make an independent set of fluctuations for reweighting pseudoexperiments
180     // it turns out using the same fluctuated ones for numerator and denominator
181     // is biased.
182     TLimitDataSource* fluctuated2 = Fluctuate(data, tmp_src2, false, myrandom, stat) ? tmp_src2 : data;
183    
184     for (Int_t channel = 0;
185     channel <= fluctuated->GetSignal()->GetLast(); channel++) {
186     for (Int_t bin = 0;
187     bin <=((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetNbinsX()+1;
188     bin++) {
189     if ((Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin) != 0) {
190     // s+b hypothesis
191     Double_t rate = (Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin) +
192     (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
193     Double_t rand = myrandom->Poisson(rate);
194     tss[i] += rand * fgTable->At((channel * maxbins) + bin);
195     Double_t s = (Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin);
196     Double_t s2= (Double_t) ((TH1 *) (fluctuated2->GetSignal()->At(channel)))->GetBinContent(bin);
197     Double_t b = (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
198     Double_t b2= (Double_t) ((TH1 *) (fluctuated2->GetBackground()->At(channel)))->GetBinContent(bin);
199     if ((s > 0) && (b2 > 0))
200     lrs[i] += LogLikelihood(s, b, b2, rand) - s - b + b2;
201     else if ((s > 0) && (b2 == 0))
202     lrs[i] += 20 * rand - s;
203     // b hypothesis
204     rate = (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
205     rand = myrandom->Poisson(rate);
206     tsb[i] += rand * fgTable->At((channel * maxbins) + bin);
207     if ((s2 > 0) && (b > 0))
208     lrb[i] += LogLikelihood(s2, b2, b, rand) - s2 - b2 + b;
209     else if ((s > 0) && (b == 0))
210     lrb[i] += 20 * rand - s;
211     }
212     }
213     }
214     lrs[i] = TMath::Exp(lrs[i] < 710 ? lrs[i] : 710);
215     lrb[i] = TMath::Exp(lrb[i] < 710 ? lrb[i] : 710);
216     }
217     delete tmp_src;
218     delete tmp_src2;
219     // lrs and lrb are the LR's (no logs) = prob(s+b)/prob(b) for
220     // that choice of s and b within syst. errors in the ensemble. These are
221     // the MC experiment weights for relating the s+b and b PDF's of the unsmeared
222     // test statistic (in which cas one can use another test statistic if one likes).
223    
224     // Now produce the output object.
225     // The final quantities are computed on-demand form the arrays tss, tsb, lrs and lrb.
226     result->SetTSS(tss);
227     result->SetTSB(tsb);
228     result->SetLRS(lrs);
229     result->SetLRB(lrb);
230     if (!generator)
231     delete myrandom;
232     return result;
233     }
234    
235     bool TLimit::Fluctuate(TLimitDataSource * input, TLimitDataSource * output,
236     bool init, TRandom * generator, bool stat)
237     {
238     // initialisation: create a sorted list of all the names of systematics
239     if (init) {
240     // create a "map" with the systematics names
241     TIterator *errornames = input->GetErrorNames()->MakeIterator();
242     TObjArray *listofnames = 0;
243     delete fgSystNames;
244     fgSystNames = new TOrdCollection();
245     while ((listofnames = ((TObjArray *) errornames->Next()))) {
246     TObjString *name = 0;
247     TIterator *loniter = listofnames->MakeIterator();
248     while ((name = (TObjString *) (loniter->Next())))
249     if ((fgSystNames->IndexOf(name)) < 0)
250     fgSystNames->AddLast(name);
251     }
252     fgSystNames->Sort();
253     }
254     // if the output is not given, create it from the input
255     if (!output)
256     output = (TLimitDataSource*)(input->Clone());
257     // if there are no systematics, just returns the input as "fluctuated" output
258     if ((fgSystNames->GetSize() <= 0)&&(!stat)) {
259     return 0;
260     }
261     // if there are only stat, just fluctuate stats.
262     if (fgSystNames->GetSize() <= 0) {
263     output->SetOwner();
264     for (Int_t channel = 0; channel <= input->GetSignal()->GetLast(); channel++) {
265     TH1 *newsignal = (TH1*)(output->GetSignal()->At(channel));
266     TH1 *oldsignal = (TH1*)(input->GetSignal()->At(channel));
267     if(stat)
268     for(int i=1; i<=newsignal->GetNbinsX(); i++) {
269     newsignal->SetBinContent(i,oldsignal->GetBinContent(i)+generator->Gaus(0,oldsignal->GetBinError(i)));
270     }
271     newsignal->SetDirectory(0);
272     TH1 *newbackground = (TH1*)(output->GetBackground()->At(channel));
273     TH1 *oldbackground = (TH1*)(input->GetBackground()->At(channel));
274     if(stat)
275     for(int i=1; i<=newbackground->GetNbinsX(); i++)
276     newbackground->SetBinContent(i,oldbackground->GetBinContent(i)+generator->Gaus(0,oldbackground->GetBinError(i)));
277     newbackground->SetDirectory(0);
278     }
279     return 1;
280     }
281     // Find a choice for the random variation and
282     // re-toss all random numbers if any background or signal
283     // goes negative. (background = 0 is bad too, so put a little protection
284     // around it -- must have at least 10% of the bg estimate).
285     Bool_t retoss = kTRUE;
286     Double_t *serrf = new Double_t[(input->GetSignal()->GetLast()) + 1];
287     Double_t *berrf = new Double_t[(input->GetSignal()->GetLast()) + 1];
288     do {
289     Double_t *toss = new Double_t[fgSystNames->GetSize()];
290     for (Int_t i = 0; i < fgSystNames->GetSize(); i++)
291     toss[i] = generator->Gaus(0, 1);
292     retoss = kFALSE;
293     for (Int_t channel = 0;
294     channel <= input->GetSignal()->GetLast();
295     channel++) {
296     serrf[channel] = 0;
297     berrf[channel] = 0;
298     for (Int_t bin = 0;
299     bin <((TVectorD *) (input->GetErrorOnSignal()->At(channel)))->GetNrows();
300     bin++) {
301     serrf[channel] += ((TVectorD *) (input->GetErrorOnSignal()->At(channel)))->operator[](bin) *
302     toss[fgSystNames->BinarySearch((TObjString*) (((TObjArray *) (input->GetErrorNames()->At(channel)))->At(bin)))];
303     berrf[channel] += ((TVectorD *) (input->GetErrorOnBackground()->At(channel)))->operator[](bin) *
304     toss[fgSystNames->BinarySearch((TObjString*) (((TObjArray *) (input->GetErrorNames()->At(channel)))->At(bin)))];
305     }
306     if ((serrf[channel] < -1.0) || (berrf[channel] < -0.9)) {
307     retoss = kTRUE;
308     continue;
309     }
310     }
311     delete[]toss;
312     } while (retoss);
313     // adjust the fluctuated signal and background counts with a legal set
314     // of random fluctuations above.
315     output->SetOwner();
316     for (Int_t channel = 0; channel <= input->GetSignal()->GetLast();
317     channel++) {
318     TH1 *newsignal = (TH1*)(output->GetSignal()->At(channel));
319     TH1 *oldsignal = (TH1*)(input->GetSignal()->At(channel));
320     if(stat)
321     for(int i=1; i<=newsignal->GetNbinsX(); i++)
322     newsignal->SetBinContent(i,oldsignal->GetBinContent(i)+generator->Gaus(0,oldsignal->GetBinError(i)));
323     else
324     for(int i=1; i<=newsignal->GetNbinsX(); i++)
325     newsignal->SetBinContent(i,oldsignal->GetBinContent(i));
326     newsignal->Scale(1 + serrf[channel]);
327     newsignal->SetDirectory(0);
328     TH1 *newbackground = (TH1*)(output->GetBackground()->At(channel));
329     TH1 *oldbackground = (TH1*)(input->GetBackground()->At(channel));
330     if(stat)
331     for(int i=1; i<=newbackground->GetNbinsX(); i++)
332     newbackground->SetBinContent(i,oldbackground->GetBinContent(i)+generator->Gaus(0,oldbackground->GetBinError(i)));
333     else
334     for(int i=1; i<=newbackground->GetNbinsX(); i++)
335     newbackground->SetBinContent(i,oldbackground->GetBinContent(i));
336     newbackground->Scale(1 + berrf[channel]);
337     newbackground->SetDirectory(0);
338     }
339     delete[] serrf;
340     delete[] berrf;
341     return 1;
342     }
343    
344     TConfidenceLevel *TLimit::ComputeLimit(TH1* s, TH1* b, TH1* d,
345     Int_t nmc, bool stat,
346     TRandom * generator)
347     {
348     // Compute limit.
349    
350     TLimitDataSource* lds = new TLimitDataSource(s,b,d);
351     TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
352     delete lds;
353     return out;
354     }
355    
356     TConfidenceLevel *TLimit::ComputeLimit(TH1* s, TH1* b, TH1* d,
357     TVectorD* se, TVectorD* be, TObjArray* l,
358     Int_t nmc, bool stat,
359     TRandom * generator)
360     {
361     // Compute limit.
362    
363     TLimitDataSource* lds = new TLimitDataSource(s,b,d,se,be,l);
364     TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
365     delete lds;
366     return out;
367     }
368    
369     TConfidenceLevel *TLimit::ComputeLimit(Double_t s, Double_t b, Int_t d,
370     Int_t nmc,
371     bool stat,
372     TRandom * generator)
373     {
374     // Compute limit.
375    
376     TH1D* sh = new TH1D("__sh","__sh",1,0,2);
377     sh->Fill(1,s);
378     TH1D* bh = new TH1D("__bh","__bh",1,0,2);
379     bh->Fill(1,b);
380     TH1D* dh = new TH1D("__dh","__dh",1,0,2);
381     dh->Fill(1,d);
382     TLimitDataSource* lds = new TLimitDataSource(sh,bh,dh);
383     TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
384     delete lds;
385     delete sh;
386     delete bh;
387     delete dh;
388     return out;
389     }
390    
391     TConfidenceLevel *TLimit::ComputeLimit(Double_t s, Double_t b, Int_t d,
392     TVectorD* se, TVectorD* be, TObjArray* l,
393     Int_t nmc,
394     bool stat,
395     TRandom * generator)
396     {
397     // Compute limit.
398    
399     TH1D* sh = new TH1D("__sh","__sh",1,0,2);
400     sh->Fill(1,s);
401     TH1D* bh = new TH1D("__bh","__bh",1,0,2);
402     bh->Fill(1,b);
403     TH1D* dh = new TH1D("__dh","__dh",1,0,2);
404     dh->Fill(1,d);
405     TLimitDataSource* lds = new TLimitDataSource(sh,bh,dh,se,be,l);
406     TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
407     delete lds;
408     delete sh;
409     delete bh;
410     delete dh;
411     return out;
412     }
413    
414     Double_t TLimit::LogLikelihood(Double_t s, Double_t b, Double_t b2, Double_t d)
415     {
416     // Compute LogLikelihood (static function)
417    
418     return d*TMath::Log((s+b)/b2);
419     }