1 |
// @(#)root/hist:$Name: $:$Id: TLimit.cc,v 1.3 2010/11/26 13:30:05 auterman Exp $
|
2 |
// Author: Christophe.Delaere@cern.ch 21/08/2002
|
3 |
|
4 |
///////////////////////////////////////////////////////////////////////////
|
5 |
//
|
6 |
// TLimit
|
7 |
//
|
8 |
// Class to compute 95% CL limits
|
9 |
//
|
10 |
// adapted from the mclimit code from Tom Junk (CLs method)
|
11 |
// see http://root.cern.ch/root/doc/TomJunk.pdf
|
12 |
// see http://cern.ch/thomasj/searchlimits/ecl.html
|
13 |
// see: Tom Junk,NIM A434, p. 435-443, 1999
|
14 |
//
|
15 |
// see also the following interesting references:
|
16 |
// Alex Read, "Presentation of search results: the CLs technique"
|
17 |
// Journal of Physics G: Nucl. Part. Phys. 28 2693-2704 (2002).
|
18 |
// http://www.iop.org/EJ/abstract/0954-3899/28/10/313/
|
19 |
//
|
20 |
// A nice article is also available in the CERN yellow report with the proceeding
|
21 |
// of the 2000 CERN workshop on confidence intervals.
|
22 |
//
|
23 |
// Alex Read, "Modified Frequentist Analysis of Search Results (The CLs Method)"
|
24 |
// CERN 2000-005 (30 May 2000)
|
25 |
//
|
26 |
//
|
27 |
///////////////////////////////////////////////////////////////////////////
|
28 |
|
29 |
#include "TLimit.h"
|
30 |
#include "TArrayD.h"
|
31 |
#include "TOrdCollection.h"
|
32 |
#include "TConfidenceLevel.h"
|
33 |
#include "TLimitDataSource.h"
|
34 |
#include "TRandom3.h"
|
35 |
#include "TH1.h"
|
36 |
#include "TObjArray.h"
|
37 |
#include "TMath.h"
|
38 |
#include "TIterator.h"
|
39 |
#include "TObjString.h"
|
40 |
#include "TClassTable.h"
|
41 |
#include "Riostream.h"
|
42 |
|
43 |
ClassImp(TLimit)
|
44 |
|
45 |
TArrayD *TLimit::fgTable = new TArrayD(0);
|
46 |
TOrdCollection *TLimit::fgSystNames = new TOrdCollection();
|
47 |
|
48 |
TConfidenceLevel *TLimit::ComputeLimit(TLimitDataSource * data,
|
49 |
Int_t nmc, bool stat,
|
50 |
TRandom * generator,
|
51 |
Double_t(*statistic) (Double_t,
|
52 |
Double_t,
|
53 |
Double_t))
|
54 |
{
|
55 |
// class TLimit
|
56 |
// ------------
|
57 |
// Algorithm to compute 95% C.L. limits using the Likelihood ratio
|
58 |
// semi-bayesian method.
|
59 |
// It takes signal, background and data histograms wrapped in a
|
60 |
// TLimitDataSource as input and runs a set of Monte Carlo experiments in
|
61 |
// order to compute the limits. If needed, inputs are fluctuated according
|
62 |
// to systematics. The output is a TConfidenceLevel.
|
63 |
//
|
64 |
// class TLimitDataSource
|
65 |
// ----------------------
|
66 |
//
|
67 |
// Takes the signal, background and data histograms as well as different
|
68 |
// systematics sources to form the TLimit input.
|
69 |
//
|
70 |
// class TConfidenceLevel
|
71 |
// ----------------------
|
72 |
//
|
73 |
// Final result of the TLimit algorithm. It is created just after the
|
74 |
// time-consuming part and can be stored in a TFile for further processing.
|
75 |
// It contains light methods to return CLs, CLb and other interesting
|
76 |
// quantities.
|
77 |
//
|
78 |
// The actual algorithm...
|
79 |
// From an input (TLimitDataSource) it produces an output TConfidenceLevel.
|
80 |
// For this, nmc Monte Carlo experiments are performed.
|
81 |
// As usual, the larger this number, the longer the compute time,
|
82 |
// but the better the result.
|
83 |
//Begin_Html
|
84 |
/*
|
85 |
<FONT SIZE=+0>
|
86 |
<p>Supposing that there is a plotfile.root file containing 3 histograms
|
87 |
(signal, background and data), you can imagine doing things like:</p>
|
88 |
<p>
|
89 |
<BLOCKQUOTE><PRE>
|
90 |
TFile* infile=new TFile("plotfile.root","READ");
|
91 |
infile->cd();
|
92 |
TH1D* sh=(TH1D*)infile->Get("signal");
|
93 |
TH1D* bh=(TH1D*)infile->Get("background");
|
94 |
TH1D* dh=(TH1D*)infile->Get("data");
|
95 |
TLimitDataSource* mydatasource = new TLimitDataSource(sh,bh,dh);
|
96 |
TConfidenceLevel *myconfidence = TLimit::ComputeLimit(mydatasource,50000);
|
97 |
cout << " CLs : " << myconfidence->CLs() << endl;
|
98 |
cout << " CLsb : " << myconfidence->CLsb() << endl;
|
99 |
cout << " CLb : " << myconfidence->CLb() << endl;
|
100 |
cout << "< CLs > : " << myconfidence->GetExpectedCLs_b() << endl;
|
101 |
cout << "< CLsb > : " << myconfidence->GetExpectedCLsb_b() << endl;
|
102 |
cout << "< CLb > : " << myconfidence->GetExpectedCLb_b() << endl;
|
103 |
delete myconfidence;
|
104 |
delete mydatasource;
|
105 |
infile->Close();
|
106 |
</PRE></BLOCKQUOTE></p>
|
107 |
<p></p>
|
108 |
<p>More informations can still be found on
|
109 |
<a HREF="http://cern.ch/aleph-proj-alphapp/doc/tlimit.html">this</a> page.</p>
|
110 |
</FONT>
|
111 |
*/
|
112 |
//End_Html
|
113 |
|
114 |
// The final object returned...
|
115 |
TConfidenceLevel *result = new TConfidenceLevel(nmc);
|
116 |
// The random generator used...
|
117 |
TRandom *myrandom = generator ? generator : new TRandom3;
|
118 |
// Compute some total quantities on all the channels
|
119 |
Int_t nbins = 0;
|
120 |
Int_t maxbins = 0;
|
121 |
Double_t nsig = 0;
|
122 |
Double_t nbg = 0;
|
123 |
Int_t ncand = 0;
|
124 |
Int_t i;
|
125 |
for (i = 0; i <= data->GetSignal()->GetLast(); i++) {
|
126 |
nbins += ((TH1D *) (data->GetSignal()->At(i)))->GetNbinsX();
|
127 |
maxbins = ((TH1D *) (data->GetSignal()->At(i)))->GetNbinsX() > maxbins ?
|
128 |
((TH1D *) (data->GetSignal()->At(i)))->GetNbinsX() + 1 : maxbins;
|
129 |
nsig += ((TH1D *) (data->GetSignal()->At(i)))->Integral();
|
130 |
nbg += ((TH1D *) (data->GetBackground()->At(i)))->Integral();
|
131 |
ncand += (Int_t) ((TH1D *) (data->GetCandidates()->At(i)))->Integral();
|
132 |
}
|
133 |
result->SetBtot(nbg);
|
134 |
result->SetStot(nsig);
|
135 |
result->SetDtot(ncand);
|
136 |
Double_t buffer = 0;
|
137 |
fgTable->Set(maxbins * (data->GetSignal()->GetLast() + 1));
|
138 |
for (Int_t channel = 0; channel <= data->GetSignal()->GetLast(); channel++)
|
139 |
for (Int_t bin = 0;
|
140 |
bin <= ((TH1D *) (data->GetSignal()->At(channel)))->GetNbinsX();
|
141 |
bin++) {
|
142 |
Double_t s = (Double_t) ((TH1D *) (data->GetSignal()->At(channel)))->GetBinContent(bin);
|
143 |
Double_t b = (Double_t) ((TH1D *) (data->GetBackground()->At(channel)))->GetBinContent(bin);
|
144 |
Double_t d = (Double_t) ((TH1D *) (data->GetCandidates()->At(channel)))->GetBinContent(bin);
|
145 |
// Compute the value of the "-2lnQ" for the actual data
|
146 |
if ((b == 0) && (s > 0)) {
|
147 |
//cout << "WARNING: Ignoring bin " << bin << " of channel "
|
148 |
// << channel << " which has s=" << s << " but b=" << b << endl;
|
149 |
//cout << " Maybe the MC statistic has to be improved..." << endl;
|
150 |
}
|
151 |
if ((s > 0) && (b > 0))
|
152 |
buffer += statistic(s, b, d);
|
153 |
// precompute the log(1+s/b)'s in an array to speed up computation
|
154 |
// background-free bins are set to have a maximum t.s. value
|
155 |
// for protection (corresponding to s/b of about 5E8)
|
156 |
if ((s > 0) && (b > 0))
|
157 |
fgTable->AddAt(statistic(s, b, 1), (channel * maxbins) + bin);
|
158 |
else if ((s > 0) && (b == 0))
|
159 |
fgTable->AddAt(20, (channel * maxbins) + bin);
|
160 |
}
|
161 |
result->SetTSD(buffer);
|
162 |
// accumulate MC experiments. Hold the test statistic function fixed, but
|
163 |
// fluctuate s and b within syst. errors for computing probabilities of
|
164 |
// having that outcome. (Alex Read's prescription -- errors are on the ensemble,
|
165 |
// not on the observed test statistic. This technique does not split outcomes.)
|
166 |
// keep the tstats as sum log(1+s/b). convert to -2lnQ when preparing the results
|
167 |
// (reason -- like to keep the < signs right)
|
168 |
Double_t *tss = new Double_t[nmc];
|
169 |
Double_t *tsb = new Double_t[nmc];
|
170 |
Double_t *lrs = new Double_t[nmc];
|
171 |
Double_t *lrb = new Double_t[nmc];
|
172 |
Double_t *sig = new Double_t[nmc];
|
173 |
Double_t *bgd = new Double_t[nmc];
|
174 |
for (i = 0; i < nmc; i++) {
|
175 |
tss[i] = 0;
|
176 |
tsb[i] = 0;
|
177 |
lrs[i] = 0;
|
178 |
lrb[i] = 0;
|
179 |
sig[i] = 0;
|
180 |
bgd[i] = 0;
|
181 |
// fluctuate signal and background
|
182 |
TLimitDataSource *fluctuated = Fluctuate(data, !i, myrandom, stat);
|
183 |
for (Int_t channel = 0;
|
184 |
channel <= fluctuated->GetSignal()->GetLast(); channel++) {
|
185 |
for (Int_t bin = 0;
|
186 |
bin <=((TH1D *) (fluctuated->GetSignal()->At(channel)))->GetNbinsX();
|
187 |
bin++) {
|
188 |
if ((Double_t) ((TH1D *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin) != 0) {
|
189 |
// s+b hypothesis
|
190 |
Double_t rate = (Double_t) ((TH1D *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin) +
|
191 |
(Double_t) ((TH1D *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
|
192 |
Double_t rand = myrandom->Poisson(rate);
|
193 |
tss[i] += rand * fgTable->At((channel * maxbins) + bin);
|
194 |
Double_t s = (Double_t) ((TH1D *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin);
|
195 |
Double_t b = (Double_t) ((TH1D *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
|
196 |
//std::cout<<"ch="<<channel<<", bin="<<bin<<", s = "<<s<<", b="<<b<<std::endl;
|
197 |
sig[i]+=s;
|
198 |
bgd[i]+=b;
|
199 |
if ((s > 0) && (b > 0))
|
200 |
lrs[i] += statistic(s, b, rand) - s;
|
201 |
else if ((s > 0) && (b == 0))
|
202 |
lrs[i] += 20 * rand - s;
|
203 |
// b hypothesis
|
204 |
rate = (Double_t) ((TH1D *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
|
205 |
rand = myrandom->Poisson(rate);
|
206 |
tsb[i] += rand * fgTable->At((channel * maxbins) + bin);
|
207 |
if ((s > 0) && (b > 0))
|
208 |
lrb[i] += statistic(s, b, rand) - s;
|
209 |
else if ((s > 0) && (b == 0))
|
210 |
lrb[i] += 20 * rand - s;
|
211 |
}
|
212 |
}
|
213 |
}
|
214 |
lrs[i] = TMath::Exp(lrs[i]);
|
215 |
lrb[i] = TMath::Exp(lrb[i]);
|
216 |
if (data != fluctuated)
|
217 |
delete fluctuated;
|
218 |
}
|
219 |
// lrs and lrb are the LR's (no logs) = prob(s+b)/prob(b) for
|
220 |
// that choice of s and b within syst. errors in the ensemble. These are
|
221 |
// the MC experiment weights for relating the s+b and b PDF's of the unsmeared
|
222 |
// test statistic (in which cas one can use another test statistic if one likes).
|
223 |
|
224 |
result->SetSig(sig);
|
225 |
result->SetBgd(bgd);
|
226 |
|
227 |
// Now produce the output object.
|
228 |
// The final quantities are computed on-demand form the arrays tss, tsb, lrs and lrb.
|
229 |
result->SetTSS(tss);
|
230 |
result->SetTSB(tsb);
|
231 |
result->SetLRS(lrs);
|
232 |
result->SetLRB(lrb);
|
233 |
if (!generator)
|
234 |
delete myrandom;
|
235 |
return result;
|
236 |
}
|
237 |
|
238 |
TLimitDataSource *TLimit::Fluctuate(TLimitDataSource * input, bool init,
|
239 |
TRandom * generator, bool stat)
|
240 |
{
|
241 |
// initialisation: create a sorted list of all the names of systematics
|
242 |
if (init) {
|
243 |
// create a "map" with the systematics names
|
244 |
TIterator *errornames = input->GetErrorNames()->MakeIterator();
|
245 |
TObjArray *listofnames = 0;
|
246 |
while ((listofnames = ((TObjArray *) errornames->Next()))) {
|
247 |
TObjString *name = NULL;
|
248 |
TIterator *loniter = listofnames->MakeIterator();
|
249 |
while ((name = (TObjString *) (loniter->Next())))
|
250 |
if ((fgSystNames->IndexOf(name)) < 0)
|
251 |
fgSystNames->AddLast(name);
|
252 |
delete loniter;
|
253 |
}
|
254 |
delete errornames;
|
255 |
fgSystNames->Sort();
|
256 |
}
|
257 |
// if there are no systematics, just returns the input as "fluctuated" output
|
258 |
if ((fgSystNames->GetSize() <= 0)&&(!stat))
|
259 |
return input;
|
260 |
// if there are only stat, just fluctuate stats.
|
261 |
if (fgSystNames->GetSize() <= 0) {
|
262 |
TLimitDataSource *result = new TLimitDataSource();
|
263 |
result->SetOwner();
|
264 |
for (Int_t channel = 0; channel <= input->GetSignal()->GetLast(); channel++) {
|
265 |
TH1D *newsignal = (TH1D *) (input->GetSignal()->At(channel))->Clone();
|
266 |
if(stat)
|
267 |
for(int i=1; i<=newsignal->GetNbinsX(); i++) {
|
268 |
newsignal->SetBinContent(i,newsignal->GetBinContent(i)+generator->Gaus(0,newsignal->GetBinError(i)));
|
269 |
}
|
270 |
newsignal->SetDirectory(0);
|
271 |
TH1D *newbackground = (TH1D *) (input->GetBackground()->At(channel))->Clone();
|
272 |
if(stat)
|
273 |
for(int i=1; i<=newbackground->GetNbinsX(); i++)
|
274 |
newbackground->SetBinContent(i,newbackground->GetBinContent(i)+generator->Gaus(0,newbackground->GetBinError(i)));
|
275 |
newbackground->SetDirectory(0);
|
276 |
TH1D *newcandidates = new TH1D(*(TH1D *) (input->GetCandidates()));
|
277 |
newcandidates->SetDirectory(0);
|
278 |
result->AddChannel(newsignal, newbackground, newcandidates);
|
279 |
}
|
280 |
return result;
|
281 |
}
|
282 |
// Find a choice for the random variation and
|
283 |
// re-toss all random numbers if any background or signal
|
284 |
// goes negative. (background = 0 is bad too, so put a little protection
|
285 |
// around it -- must have at least 10% of the bg estimate).
|
286 |
bool retoss = kTRUE;
|
287 |
Double_t *serrf = NULL;
|
288 |
Double_t *berrf = NULL;
|
289 |
do {
|
290 |
Double_t *toss = new Double_t[fgSystNames->GetSize()];
|
291 |
for (Int_t i = 0; i < fgSystNames->GetSize(); i++)
|
292 |
toss[i] = generator->Gaus(0, 1);
|
293 |
retoss = kFALSE;
|
294 |
serrf = new Double_t[(input->GetSignal()->GetLast()) + 1];
|
295 |
berrf = new Double_t[(input->GetSignal()->GetLast()) + 1];
|
296 |
for (Int_t channel = 0;
|
297 |
channel <= input->GetSignal()->GetLast();
|
298 |
channel++) {
|
299 |
serrf[channel] = 0;
|
300 |
berrf[channel] = 0;
|
301 |
bool AsymmetricErrors = ( ((TH1D *) (input->GetErrorOnSignalNeg()->At(channel)))->GetNbinsX() > 0);
|
302 |
for (Int_t bin = 0;
|
303 |
bin <=((TH1D *) (input->GetErrorOnSignal()->At(channel)))->GetNbinsX();
|
304 |
bin++) {
|
305 |
|
306 |
Double_t s_toss=toss[fgSystNames->BinarySearch((TObjString*) (((TObjArray *) (input->GetErrorNames()->At(channel)))->At(bin)))];
|
307 |
Double_t b_toss=toss[fgSystNames->BinarySearch((TObjString*) (((TObjArray *) (input->GetErrorNames()->At(channel)))->At(bin)))];
|
308 |
Double_t s_scale=((TH1D *) (input->GetErrorOnSignal()->At(channel)))->GetBinContent(bin);
|
309 |
Double_t b_scale=((TH1D *) (input->GetErrorOnBackground()->At(channel)))->GetBinContent(bin);
|
310 |
|
311 |
if (AsymmetricErrors && s_toss<0) {
|
312 |
s_scale=((TH1D *) (input->GetErrorOnSignalNeg()->At(channel)))->GetBinContent(bin);
|
313 |
}
|
314 |
if (AsymmetricErrors && b_toss<0) {
|
315 |
b_scale=((TH1D *) (input->GetErrorOnBackgroundNeg()->At(channel)))->GetBinContent(bin);
|
316 |
}
|
317 |
|
318 |
serrf[channel] += s_scale * s_toss;
|
319 |
berrf[channel] += b_scale * b_toss;
|
320 |
}
|
321 |
if ((serrf[channel] < -1.0) || (berrf[channel] < -0.9)) {
|
322 |
retoss = kTRUE;
|
323 |
continue;
|
324 |
}
|
325 |
}
|
326 |
delete[]toss;
|
327 |
} while (retoss);
|
328 |
// adjust the fluctuated signal and background counts with a legal set
|
329 |
// of random fluctuations above.
|
330 |
TLimitDataSource *result = new TLimitDataSource();
|
331 |
result->SetOwner();
|
332 |
for (Int_t channel = 0; channel <= input->GetSignal()->GetLast();
|
333 |
channel++) {
|
334 |
TH1D *newsignal = new TH1D(*(TH1D *) (input->GetSignal()->At(channel)));
|
335 |
if(stat)
|
336 |
for(int i=1; i<=newsignal->GetNbinsX(); i++) {
|
337 |
newsignal->SetBinContent(i,newsignal->GetBinContent(i)+generator->Gaus(0,newsignal->GetBinError(i)));
|
338 |
}
|
339 |
newsignal->Scale(1 + serrf[channel]);
|
340 |
newsignal->SetDirectory(0);
|
341 |
TH1D *newbackground = new TH1D(*(TH1D *) (input->GetBackground()->At(channel)));
|
342 |
if(stat)
|
343 |
for(int i=1; i<=newbackground->GetNbinsX(); i++)
|
344 |
newbackground->SetBinContent(i,newbackground->GetBinContent(i)+generator->Gaus(0,newbackground->GetBinError(i)));
|
345 |
newbackground->Scale(1 + berrf[channel]);
|
346 |
newbackground->SetDirectory(0);
|
347 |
TH1D *newcandidates = new TH1D(*(TH1D *) (input->GetCandidates()));
|
348 |
newcandidates->SetDirectory(0);
|
349 |
result->AddChannel(newsignal, newbackground, newcandidates);
|
350 |
}
|
351 |
delete[] serrf;
|
352 |
delete[] berrf;
|
353 |
return result;
|
354 |
}
|
355 |
|