1 |
// @(#)root/hist:$Id: TLimit.cxx 26221 2008-11-17 08:10:26Z brun $
|
2 |
// Author: Christophe.Delaere@cern.ch 21/08/2002
|
3 |
|
4 |
///////////////////////////////////////////////////////////////////////////
|
5 |
//
|
6 |
// TLimit
|
7 |
//
|
8 |
// Class to compute 95% CL limits
|
9 |
//
|
10 |
// adapted from the mclimit code from Tom Junk (CLs method)
|
11 |
// see http://root.cern.ch/root/doc/TomJunk.pdf
|
12 |
// see http://cern.ch/thomasj/searchlimits/ecl.html
|
13 |
// see: Tom Junk,NIM A434, p. 435-443, 1999
|
14 |
//
|
15 |
// see also the following interesting references:
|
16 |
// Alex Read, "Presentation of search results: the CLs technique"
|
17 |
// Journal of Physics G: Nucl. Part. Phys. 28 2693-2704 (2002).
|
18 |
// http://www.iop.org/EJ/abstract/0954-3899/28/10/313/
|
19 |
//
|
20 |
// A nice article is also available in the CERN yellow report with the proceeding
|
21 |
// of the 2000 CERN workshop on confidence intervals.
|
22 |
//
|
23 |
// Alex Read, "Modified Frequentist Analysis of Search Results (The CLs Method)"
|
24 |
// CERN 2000-005 (30 May 2000)
|
25 |
//
|
26 |
// see note about: "Should I use TRolke, TFeldmanCousins, TLimit?"
|
27 |
// in the TRolke class description.
|
28 |
//
|
29 |
///////////////////////////////////////////////////////////////////////////
|
30 |
|
31 |
#include "TLimit.h"
|
32 |
#include "TArrayD.h"
|
33 |
#include "TVectorD.h"
|
34 |
#include "TOrdCollection.h"
|
35 |
#include "TConfidenceLevel.h"
|
36 |
#include "TLimitDataSource.h"
|
37 |
#include "TRandom3.h"
|
38 |
#include "TH1.h"
|
39 |
#include "TObjArray.h"
|
40 |
#include "TMath.h"
|
41 |
#include "TIterator.h"
|
42 |
#include "TObjString.h"
|
43 |
#include "TClassTable.h"
|
44 |
#include "Riostream.h"
|
45 |
|
46 |
ClassImp(TLimit)
|
47 |
|
48 |
TArrayD *TLimit::fgTable = new TArrayD(0);
|
49 |
TOrdCollection *TLimit::fgSystNames = new TOrdCollection();
|
50 |
|
51 |
TConfidenceLevel *TLimit::ComputeLimit(TLimitDataSource * data,
|
52 |
Int_t nmc, bool stat,
|
53 |
TRandom * generator)
|
54 |
{
|
55 |
// class TLimit
|
56 |
// ------------
|
57 |
// Algorithm to compute 95% C.L. limits using the Likelihood ratio
|
58 |
// semi-bayesian method.
|
59 |
// It takes signal, background and data histograms wrapped in a
|
60 |
// TLimitDataSource as input and runs a set of Monte Carlo experiments in
|
61 |
// order to compute the limits. If needed, inputs are fluctuated according
|
62 |
// to systematics. The output is a TConfidenceLevel.
|
63 |
//
|
64 |
// class TLimitDataSource
|
65 |
// ----------------------
|
66 |
//
|
67 |
// Takes the signal, background and data histograms as well as different
|
68 |
// systematics sources to form the TLimit input.
|
69 |
//
|
70 |
// class TConfidenceLevel
|
71 |
// ----------------------
|
72 |
//
|
73 |
// Final result of the TLimit algorithm. It is created just after the
|
74 |
// time-consuming part and can be stored in a TFile for further processing.
|
75 |
// It contains light methods to return CLs, CLb and other interesting
|
76 |
// quantities.
|
77 |
//
|
78 |
// The actual algorithm...
|
79 |
// From an input (TLimitDataSource) it produces an output TConfidenceLevel.
|
80 |
// For this, nmc Monte Carlo experiments are performed.
|
81 |
// As usual, the larger this number, the longer the compute time,
|
82 |
// but the better the result.
|
83 |
//Begin_Html
|
84 |
/*
|
85 |
<FONT SIZE=+0>
|
86 |
<p>Supposing that there is a plotfile.root file containing 3 histograms
|
87 |
(signal, background and data), you can imagine doing things like:</p>
|
88 |
<p>
|
89 |
<BLOCKQUOTE><PRE>
|
90 |
TFile* infile=new TFile("plotfile.root","READ");
|
91 |
infile->cd();
|
92 |
TH1* sh=(TH1*)infile->Get("signal");
|
93 |
TH1* bh=(TH1*)infile->Get("background");
|
94 |
TH1* dh=(TH1*)infile->Get("data");
|
95 |
TLimitDataSource* mydatasource = new TLimitDataSource(sh,bh,dh);
|
96 |
TConfidenceLevel *myconfidence = TLimit::ComputeLimit(mydatasource,50000);
|
97 |
cout << " CLs : " << myconfidence->CLs() << endl;
|
98 |
cout << " CLsb : " << myconfidence->CLsb() << endl;
|
99 |
cout << " CLb : " << myconfidence->CLb() << endl;
|
100 |
cout << "< CLs > : " << myconfidence->GetExpectedCLs_b() << endl;
|
101 |
cout << "< CLsb > : " << myconfidence->GetExpectedCLsb_b() << endl;
|
102 |
cout << "< CLb > : " << myconfidence->GetExpectedCLb_b() << endl;
|
103 |
delete myconfidence;
|
104 |
delete mydatasource;
|
105 |
infile->Close();
|
106 |
</PRE></BLOCKQUOTE></p>
|
107 |
<p></p>
|
108 |
<p>More informations can still be found on
|
109 |
<a HREF="http://cern.ch/aleph-proj-alphapp/doc/tlimit.html">this</a> page.</p>
|
110 |
</FONT>
|
111 |
*/
|
112 |
//End_Html
|
113 |
|
114 |
// The final object returned...
|
115 |
TConfidenceLevel *result = new TConfidenceLevel(nmc);
|
116 |
// The random generator used...
|
117 |
TRandom *myrandom = generator ? generator : new TRandom3;
|
118 |
// Compute some total quantities on all the channels
|
119 |
Int_t maxbins = 0;
|
120 |
Double_t nsig = 0;
|
121 |
Double_t nbg = 0;
|
122 |
Int_t ncand = 0;
|
123 |
Int_t i;
|
124 |
for (i = 0; i <= data->GetSignal()->GetLast(); i++) {
|
125 |
maxbins = (((TH1 *) (data->GetSignal()->At(i)))->GetNbinsX() + 2) > maxbins ?
|
126 |
(((TH1 *) (data->GetSignal()->At(i)))->GetNbinsX() + 2) : maxbins;
|
127 |
nsig += ((TH1 *) (data->GetSignal()->At(i)))->Integral();
|
128 |
nbg += ((TH1 *) (data->GetBackground()->At(i)))->Integral();
|
129 |
ncand += (Int_t) ((TH1 *) (data->GetCandidates()->At(i)))->Integral();
|
130 |
}
|
131 |
result->SetBtot(nbg);
|
132 |
result->SetStot(nsig);
|
133 |
result->SetDtot(ncand);
|
134 |
Double_t buffer = 0;
|
135 |
fgTable->Set(maxbins * (data->GetSignal()->GetLast() + 1));
|
136 |
for (Int_t channel = 0; channel <= data->GetSignal()->GetLast(); channel++)
|
137 |
for (Int_t bin = 0;
|
138 |
bin <= ((TH1 *) (data->GetSignal()->At(channel)))->GetNbinsX()+1;
|
139 |
bin++) {
|
140 |
Double_t s = (Double_t) ((TH1 *) (data->GetSignal()->At(channel)))->GetBinContent(bin);
|
141 |
Double_t b = (Double_t) ((TH1 *) (data->GetBackground()->At(channel)))->GetBinContent(bin);
|
142 |
Double_t d = (Double_t) ((TH1 *) (data->GetCandidates()->At(channel)))->GetBinContent(bin);
|
143 |
// Compute the value of the "-2lnQ" for the actual data
|
144 |
if ((b == 0) && (s > 0)) {
|
145 |
cout << "WARNING: Ignoring bin " << bin << " of channel "
|
146 |
<< channel << " which has s=" << s << " but b=" << b << endl;
|
147 |
cout << " Maybe the MC statistic has to be improved..." << endl;
|
148 |
}
|
149 |
if ((s > 0) && (b > 0))
|
150 |
buffer += LogLikelihood(s, b, b, d);
|
151 |
// precompute the log(1+s/b)'s in an array to speed up computation
|
152 |
// background-free bins are set to have a maximum t.s. value
|
153 |
// for protection (corresponding to s/b of about 5E8)
|
154 |
if ((s > 0) && (b > 0))
|
155 |
fgTable->AddAt(LogLikelihood(s, b, b, 1), (channel * maxbins) + bin);
|
156 |
else if ((s > 0) && (b == 0))
|
157 |
fgTable->AddAt(20, (channel * maxbins) + bin);
|
158 |
}
|
159 |
result->SetTSD(buffer);
|
160 |
// accumulate MC experiments. Hold the test statistic function fixed, but
|
161 |
// fluctuate s and b within syst. errors for computing probabilities of
|
162 |
// having that outcome. (Alex Read's prescription -- errors are on the ensemble,
|
163 |
// not on the observed test statistic. This technique does not split outcomes.)
|
164 |
// keep the tstats as sum log(1+s/b). convert to -2lnQ when preparing the results
|
165 |
// (reason -- like to keep the < signs right)
|
166 |
Double_t *tss = new Double_t[nmc];
|
167 |
Double_t *tsb = new Double_t[nmc];
|
168 |
Double_t *lrs = new Double_t[nmc];
|
169 |
Double_t *lrb = new Double_t[nmc];
|
170 |
TLimitDataSource* tmp_src = (TLimitDataSource*)(data->Clone());
|
171 |
TLimitDataSource* tmp_src2 = (TLimitDataSource*)(data->Clone());
|
172 |
for (i = 0; i < nmc; i++) {
|
173 |
tss[i] = 0;
|
174 |
tsb[i] = 0;
|
175 |
lrs[i] = 0;
|
176 |
lrb[i] = 0;
|
177 |
// fluctuate signal and background
|
178 |
TLimitDataSource* fluctuated = Fluctuate(data, tmp_src, !i, myrandom, stat) ? tmp_src : data;
|
179 |
// make an independent set of fluctuations for reweighting pseudoexperiments
|
180 |
// it turns out using the same fluctuated ones for numerator and denominator
|
181 |
// is biased.
|
182 |
TLimitDataSource* fluctuated2 = Fluctuate(data, tmp_src2, false, myrandom, stat) ? tmp_src2 : data;
|
183 |
|
184 |
for (Int_t channel = 0;
|
185 |
channel <= fluctuated->GetSignal()->GetLast(); channel++) {
|
186 |
for (Int_t bin = 0;
|
187 |
bin <=((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetNbinsX()+1;
|
188 |
bin++) {
|
189 |
if ((Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin) != 0) {
|
190 |
// s+b hypothesis
|
191 |
Double_t rate = (Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin) +
|
192 |
(Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
|
193 |
Double_t rand = myrandom->Poisson(rate);
|
194 |
tss[i] += rand * fgTable->At((channel * maxbins) + bin);
|
195 |
Double_t s = (Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin);
|
196 |
Double_t s2= (Double_t) ((TH1 *) (fluctuated2->GetSignal()->At(channel)))->GetBinContent(bin);
|
197 |
Double_t b = (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
|
198 |
Double_t b2= (Double_t) ((TH1 *) (fluctuated2->GetBackground()->At(channel)))->GetBinContent(bin);
|
199 |
if ((s > 0) && (b2 > 0))
|
200 |
lrs[i] += LogLikelihood(s, b, b2, rand) - s - b + b2;
|
201 |
else if ((s > 0) && (b2 == 0))
|
202 |
lrs[i] += 20 * rand - s;
|
203 |
// b hypothesis
|
204 |
rate = (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
|
205 |
rand = myrandom->Poisson(rate);
|
206 |
tsb[i] += rand * fgTable->At((channel * maxbins) + bin);
|
207 |
if ((s2 > 0) && (b > 0))
|
208 |
lrb[i] += LogLikelihood(s2, b2, b, rand) - s2 - b2 + b;
|
209 |
else if ((s > 0) && (b == 0))
|
210 |
lrb[i] += 20 * rand - s;
|
211 |
}
|
212 |
}
|
213 |
}
|
214 |
lrs[i] = TMath::Exp(lrs[i] < 710 ? lrs[i] : 710);
|
215 |
lrb[i] = TMath::Exp(lrb[i] < 710 ? lrb[i] : 710);
|
216 |
}
|
217 |
delete tmp_src;
|
218 |
delete tmp_src2;
|
219 |
// lrs and lrb are the LR's (no logs) = prob(s+b)/prob(b) for
|
220 |
// that choice of s and b within syst. errors in the ensemble. These are
|
221 |
// the MC experiment weights for relating the s+b and b PDF's of the unsmeared
|
222 |
// test statistic (in which cas one can use another test statistic if one likes).
|
223 |
|
224 |
// Now produce the output object.
|
225 |
// The final quantities are computed on-demand form the arrays tss, tsb, lrs and lrb.
|
226 |
result->SetTSS(tss);
|
227 |
result->SetTSB(tsb);
|
228 |
result->SetLRS(lrs);
|
229 |
result->SetLRB(lrb);
|
230 |
if (!generator)
|
231 |
delete myrandom;
|
232 |
return result;
|
233 |
}
|
234 |
|
235 |
bool TLimit::Fluctuate(TLimitDataSource * input, TLimitDataSource * output,
|
236 |
bool init, TRandom * generator, bool stat)
|
237 |
{
|
238 |
// initialisation: create a sorted list of all the names of systematics
|
239 |
if (init) {
|
240 |
// create a "map" with the systematics names
|
241 |
TIterator *errornames = input->GetErrorNames()->MakeIterator();
|
242 |
TObjArray *listofnames = 0;
|
243 |
delete fgSystNames;
|
244 |
fgSystNames = new TOrdCollection();
|
245 |
while ((listofnames = ((TObjArray *) errornames->Next()))) {
|
246 |
TObjString *name = 0;
|
247 |
TIterator *loniter = listofnames->MakeIterator();
|
248 |
while ((name = (TObjString *) (loniter->Next())))
|
249 |
if ((fgSystNames->IndexOf(name)) < 0)
|
250 |
fgSystNames->AddLast(name);
|
251 |
}
|
252 |
fgSystNames->Sort();
|
253 |
}
|
254 |
// if the output is not given, create it from the input
|
255 |
if (!output)
|
256 |
output = (TLimitDataSource*)(input->Clone());
|
257 |
// if there are no systematics, just returns the input as "fluctuated" output
|
258 |
if ((fgSystNames->GetSize() <= 0)&&(!stat)) {
|
259 |
return 0;
|
260 |
}
|
261 |
// if there are only stat, just fluctuate stats.
|
262 |
if (fgSystNames->GetSize() <= 0) {
|
263 |
output->SetOwner();
|
264 |
for (Int_t channel = 0; channel <= input->GetSignal()->GetLast(); channel++) {
|
265 |
TH1 *newsignal = (TH1*)(output->GetSignal()->At(channel));
|
266 |
TH1 *oldsignal = (TH1*)(input->GetSignal()->At(channel));
|
267 |
if(stat)
|
268 |
for(int i=1; i<=newsignal->GetNbinsX(); i++) {
|
269 |
newsignal->SetBinContent(i,oldsignal->GetBinContent(i)+generator->Gaus(0,oldsignal->GetBinError(i)));
|
270 |
}
|
271 |
newsignal->SetDirectory(0);
|
272 |
TH1 *newbackground = (TH1*)(output->GetBackground()->At(channel));
|
273 |
TH1 *oldbackground = (TH1*)(input->GetBackground()->At(channel));
|
274 |
if(stat)
|
275 |
for(int i=1; i<=newbackground->GetNbinsX(); i++)
|
276 |
newbackground->SetBinContent(i,oldbackground->GetBinContent(i)+generator->Gaus(0,oldbackground->GetBinError(i)));
|
277 |
newbackground->SetDirectory(0);
|
278 |
}
|
279 |
return 1;
|
280 |
}
|
281 |
// Find a choice for the random variation and
|
282 |
// re-toss all random numbers if any background or signal
|
283 |
// goes negative. (background = 0 is bad too, so put a little protection
|
284 |
// around it -- must have at least 10% of the bg estimate).
|
285 |
Bool_t retoss = kTRUE;
|
286 |
Double_t *serrf = new Double_t[(input->GetSignal()->GetLast()) + 1];
|
287 |
Double_t *berrf = new Double_t[(input->GetSignal()->GetLast()) + 1];
|
288 |
do {
|
289 |
Double_t *toss = new Double_t[fgSystNames->GetSize()];
|
290 |
for (Int_t i = 0; i < fgSystNames->GetSize(); i++)
|
291 |
toss[i] = generator->Gaus(0, 1);
|
292 |
retoss = kFALSE;
|
293 |
for (Int_t channel = 0;
|
294 |
channel <= input->GetSignal()->GetLast();
|
295 |
channel++) {
|
296 |
serrf[channel] = 0;
|
297 |
berrf[channel] = 0;
|
298 |
for (Int_t bin = 0;
|
299 |
bin <((TVectorD *) (input->GetErrorOnSignal()->At(channel)))->GetNrows();
|
300 |
bin++) {
|
301 |
serrf[channel] += ((TVectorD *) (input->GetErrorOnSignal()->At(channel)))->operator[](bin) *
|
302 |
toss[fgSystNames->BinarySearch((TObjString*) (((TObjArray *) (input->GetErrorNames()->At(channel)))->At(bin)))];
|
303 |
berrf[channel] += ((TVectorD *) (input->GetErrorOnBackground()->At(channel)))->operator[](bin) *
|
304 |
toss[fgSystNames->BinarySearch((TObjString*) (((TObjArray *) (input->GetErrorNames()->At(channel)))->At(bin)))];
|
305 |
}
|
306 |
if ((serrf[channel] < -1.0) || (berrf[channel] < -0.9)) {
|
307 |
retoss = kTRUE;
|
308 |
continue;
|
309 |
}
|
310 |
}
|
311 |
delete[]toss;
|
312 |
} while (retoss);
|
313 |
// adjust the fluctuated signal and background counts with a legal set
|
314 |
// of random fluctuations above.
|
315 |
output->SetOwner();
|
316 |
for (Int_t channel = 0; channel <= input->GetSignal()->GetLast();
|
317 |
channel++) {
|
318 |
TH1 *newsignal = (TH1*)(output->GetSignal()->At(channel));
|
319 |
TH1 *oldsignal = (TH1*)(input->GetSignal()->At(channel));
|
320 |
if(stat)
|
321 |
for(int i=1; i<=newsignal->GetNbinsX(); i++)
|
322 |
newsignal->SetBinContent(i,oldsignal->GetBinContent(i)+generator->Gaus(0,oldsignal->GetBinError(i)));
|
323 |
else
|
324 |
for(int i=1; i<=newsignal->GetNbinsX(); i++)
|
325 |
newsignal->SetBinContent(i,oldsignal->GetBinContent(i));
|
326 |
newsignal->Scale(1 + serrf[channel]);
|
327 |
newsignal->SetDirectory(0);
|
328 |
TH1 *newbackground = (TH1*)(output->GetBackground()->At(channel));
|
329 |
TH1 *oldbackground = (TH1*)(input->GetBackground()->At(channel));
|
330 |
if(stat)
|
331 |
for(int i=1; i<=newbackground->GetNbinsX(); i++)
|
332 |
newbackground->SetBinContent(i,oldbackground->GetBinContent(i)+generator->Gaus(0,oldbackground->GetBinError(i)));
|
333 |
else
|
334 |
for(int i=1; i<=newbackground->GetNbinsX(); i++)
|
335 |
newbackground->SetBinContent(i,oldbackground->GetBinContent(i));
|
336 |
newbackground->Scale(1 + berrf[channel]);
|
337 |
newbackground->SetDirectory(0);
|
338 |
}
|
339 |
delete[] serrf;
|
340 |
delete[] berrf;
|
341 |
return 1;
|
342 |
}
|
343 |
|
344 |
TConfidenceLevel *TLimit::ComputeLimit(TH1* s, TH1* b, TH1* d,
|
345 |
Int_t nmc, bool stat,
|
346 |
TRandom * generator)
|
347 |
{
|
348 |
// Compute limit.
|
349 |
|
350 |
TLimitDataSource* lds = new TLimitDataSource(s,b,d);
|
351 |
TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
|
352 |
delete lds;
|
353 |
return out;
|
354 |
}
|
355 |
|
356 |
TConfidenceLevel *TLimit::ComputeLimit(TH1* s, TH1* b, TH1* d,
|
357 |
TVectorD* se, TVectorD* be, TObjArray* l,
|
358 |
Int_t nmc, bool stat,
|
359 |
TRandom * generator)
|
360 |
{
|
361 |
// Compute limit.
|
362 |
|
363 |
TLimitDataSource* lds = new TLimitDataSource(s,b,d,se,be,l);
|
364 |
TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
|
365 |
delete lds;
|
366 |
return out;
|
367 |
}
|
368 |
|
369 |
TConfidenceLevel *TLimit::ComputeLimit(Double_t s, Double_t b, Int_t d,
|
370 |
Int_t nmc,
|
371 |
bool stat,
|
372 |
TRandom * generator)
|
373 |
{
|
374 |
// Compute limit.
|
375 |
|
376 |
TH1D* sh = new TH1D("__sh","__sh",1,0,2);
|
377 |
sh->Fill(1,s);
|
378 |
TH1D* bh = new TH1D("__bh","__bh",1,0,2);
|
379 |
bh->Fill(1,b);
|
380 |
TH1D* dh = new TH1D("__dh","__dh",1,0,2);
|
381 |
dh->Fill(1,d);
|
382 |
TLimitDataSource* lds = new TLimitDataSource(sh,bh,dh);
|
383 |
TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
|
384 |
delete lds;
|
385 |
delete sh;
|
386 |
delete bh;
|
387 |
delete dh;
|
388 |
return out;
|
389 |
}
|
390 |
|
391 |
TConfidenceLevel *TLimit::ComputeLimit(Double_t s, Double_t b, Int_t d,
|
392 |
TVectorD* se, TVectorD* be, TObjArray* l,
|
393 |
Int_t nmc,
|
394 |
bool stat,
|
395 |
TRandom * generator)
|
396 |
{
|
397 |
// Compute limit.
|
398 |
|
399 |
TH1D* sh = new TH1D("__sh","__sh",1,0,2);
|
400 |
sh->Fill(1,s);
|
401 |
TH1D* bh = new TH1D("__bh","__bh",1,0,2);
|
402 |
bh->Fill(1,b);
|
403 |
TH1D* dh = new TH1D("__dh","__dh",1,0,2);
|
404 |
dh->Fill(1,d);
|
405 |
TLimitDataSource* lds = new TLimitDataSource(sh,bh,dh,se,be,l);
|
406 |
TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
|
407 |
delete lds;
|
408 |
delete sh;
|
409 |
delete bh;
|
410 |
delete dh;
|
411 |
return out;
|
412 |
}
|
413 |
|
414 |
Double_t TLimit::LogLikelihood(Double_t s, Double_t b, Double_t b2, Double_t d)
|
415 |
{
|
416 |
// Compute LogLikelihood (static function)
|
417 |
|
418 |
return d*TMath::Log((s+b)/b2);
|
419 |
}
|