1 |
// Standard tutorial macro for performing an inverted hypothesis test
|
2 |
//
|
3 |
// This macro will perform a scan of tehe p-values for computing the limit
|
4 |
//
|
5 |
|
6 |
#include <iostream>
|
7 |
|
8 |
#include "StandardHypoTestInvDemo.h"
|
9 |
|
10 |
|
11 |
using namespace RooFit;
|
12 |
using namespace RooStats;
|
13 |
|
14 |
|
15 |
bool plotHypoTestResult = true;
|
16 |
bool useProof = true;
|
17 |
bool optimize = false;
|
18 |
bool writeResult = false;
|
19 |
int nworkers = 4;
|
20 |
bool rebuild = false;
|
21 |
int nToyToRebuild = 100;
|
22 |
|
23 |
|
24 |
|
25 |
|
26 |
|
27 |
void StandardHypoTestInvDemo(const char * fileName,
|
28 |
const char * wsName,
|
29 |
const char * modelSBName,
|
30 |
const char * modelBName,
|
31 |
const char * dataName,
|
32 |
int calculatorType,
|
33 |
int testStatType,
|
34 |
bool useCls,
|
35 |
int npoints,
|
36 |
double poimin,
|
37 |
double poimax,
|
38 |
int ntoys,
|
39 |
bool useNumberCounting,
|
40 |
const char * nuisPriorName)
|
41 |
{
|
42 |
/*
|
43 |
|
44 |
Other Parameter to pass in tutorial
|
45 |
apart from standard for filename, ws, modelconfig and data
|
46 |
|
47 |
type = 0 Freq calculator
|
48 |
type = 1 Hybrid
|
49 |
|
50 |
testStatType = 0 LEP
|
51 |
= 1 Tevatron
|
52 |
= 2 Profile Likelihood
|
53 |
= 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat)
|
54 |
|
55 |
useCLs scan for CLs (otherwise for CLs+b)
|
56 |
|
57 |
npoints: number of points to scan , for autoscan set npoints = -1
|
58 |
|
59 |
poimin,poimax: min/max value to scan in case of fixed scans
|
60 |
(if min >= max, try to find automatically)
|
61 |
|
62 |
ntoys: number of toys to use
|
63 |
|
64 |
useNumberCounting: set to true when using number counting events
|
65 |
|
66 |
nuisPriorName: name of prior for the nnuisance. This is often expressed as constraint term in the global model
|
67 |
It is needed only when using the HybridCalculator (type=1)
|
68 |
If not given by default the prior pdf from ModelConfig is used.
|
69 |
|
70 |
extra options are available as global paramters of the macro. They are:
|
71 |
|
72 |
plotHypoTestResult plot result of tests at each point (TS distributions)
|
73 |
useProof = true;
|
74 |
writeResult = true;
|
75 |
nworkers = 4;
|
76 |
|
77 |
|
78 |
*/
|
79 |
|
80 |
if (fileName==0) {
|
81 |
fileName = "example_combined_GaussExample_model.root";
|
82 |
std::cout << "Use standard file generated with HistFactory :" << fileName << std::endl;
|
83 |
}
|
84 |
TFile * file = new TFile(fileName);
|
85 |
|
86 |
RooWorkspace * w = dynamic_cast<RooWorkspace*>( file->Get(wsName) );
|
87 |
HypoTestInverterResult * r = 0;
|
88 |
std::cout << w << "\t" << fileName << std::endl;
|
89 |
if (w != NULL) {
|
90 |
r = RunInverter(w, modelSBName, modelBName, dataName, calculatorType, testStatType, npoints, poimin, poimax,
|
91 |
ntoys, useCls, useNumberCounting, nuisPriorName );
|
92 |
if (!r) {
|
93 |
std::cerr << "Error running the HypoTestInverter - Exit " << std::endl;
|
94 |
return;
|
95 |
}
|
96 |
}
|
97 |
else
|
98 |
{
|
99 |
// case workspace is not present look for the inverter result
|
100 |
std::cout << "Reading an HypoTestInverterResult with name " << wsName << " from file " << fileName << std::endl;
|
101 |
r = dynamic_cast<HypoTestInverterResult*>( file->Get(wsName) ); //
|
102 |
if (!r) {
|
103 |
std::cerr << "File " << fileName << " does not contain a workspace or an HypoTestInverterResult - Exit "
|
104 |
<< std::endl;
|
105 |
file->ls();
|
106 |
return;
|
107 |
}
|
108 |
}
|
109 |
|
110 |
|
111 |
double upperLimit = r->UpperLimit();
|
112 |
double ulError = r->UpperLimitEstimatedError();
|
113 |
|
114 |
|
115 |
std::cout << "The computed upper limit is: " << upperLimit << " +/- " << ulError << std::endl;
|
116 |
|
117 |
const int nEntries = r->ArraySize();
|
118 |
|
119 |
|
120 |
const char * typeName = (calculatorType == 0) ? "Frequentist" : "Hybrid";
|
121 |
const char * resultName = (w) ? w->GetName() : r->GetName();
|
122 |
TString plotTitle = TString::Format("%s CL Scan for workspace %s",typeName,resultName);
|
123 |
HypoTestInverterPlot *plot = new HypoTestInverterPlot("HTI_Result_Plot",plotTitle,r);
|
124 |
plot->Draw("CLb 2CL"); // plot all and Clb
|
125 |
|
126 |
if (plotHypoTestResult) {
|
127 |
TCanvas * c2 = new TCanvas();
|
128 |
c2->Divide( 2, TMath::Ceil(nEntries/2));
|
129 |
for (int i=0; i<nEntries; i++) {
|
130 |
c2->cd(i+1);
|
131 |
SamplingDistPlot * pl = plot->MakeTestStatPlot(i);
|
132 |
pl->SetLogYaxis(true);
|
133 |
pl->Draw();
|
134 |
}
|
135 |
}
|
136 |
|
137 |
|
138 |
std::cout << " expected limit (median) " << r->GetExpectedUpperLimit(0) << std::endl;
|
139 |
std::cout << " expected limit (-1 sig) " << r->GetExpectedUpperLimit(-1) << std::endl;
|
140 |
std::cout << " expected limit (+1 sig) " << r->GetExpectedUpperLimit(1) << std::endl;
|
141 |
|
142 |
|
143 |
if (w != NULL && writeResult) {
|
144 |
|
145 |
// write to a file the results
|
146 |
const char * calcType = (calculatorType == 0) ? "Freq" : "Hybr";
|
147 |
const char * limitType = (useCls) ? "CLs" : "Cls+b";
|
148 |
const char * scanType = (npoints < 0) ? "auto" : "grid";
|
149 |
TString resultFileName = TString::Format("%s_%s_%s_ts%d_",calcType,limitType,scanType,testStatType);
|
150 |
resultFileName += fileName;
|
151 |
|
152 |
TFile * fileOut = new TFile(resultFileName,"RECREATE");
|
153 |
r->Write();
|
154 |
fileOut->Close();
|
155 |
}
|
156 |
|
157 |
}
|
158 |
|
159 |
|
160 |
// internal routine to run the inverter
|
161 |
HypoTestInverterResult * RunInverter(RooWorkspace * w, const char * modelSBName, const char * modelBName,
|
162 |
const char * dataName, int type, int testStatType,
|
163 |
int npoints, double poimin, double poimax,
|
164 |
int ntoys, bool useCls, bool useNumberCounting, const char * nuisPriorName )
|
165 |
{
|
166 |
|
167 |
std::cout << "Running HypoTestInverter on the workspace " << w->GetName() << std::endl;
|
168 |
|
169 |
w->Print();
|
170 |
|
171 |
|
172 |
RooAbsData * data = w->data(dataName);
|
173 |
if (!data) {
|
174 |
Error("StandardHypoTestDemo","Not existing data %s",dataName);
|
175 |
return 0;
|
176 |
}
|
177 |
else
|
178 |
std::cout << "Using data set " << dataName << std::endl;
|
179 |
|
180 |
|
181 |
// get models from WS
|
182 |
// get the modelConfig out of the file
|
183 |
ModelConfig* bModel = (ModelConfig*) w->obj(modelBName);
|
184 |
ModelConfig* sbModel = (ModelConfig*) w->obj(modelSBName);
|
185 |
|
186 |
if (!sbModel) {
|
187 |
Error("StandardHypoTestDemo","Not existing ModelConfig %s",modelSBName);
|
188 |
return 0;
|
189 |
}
|
190 |
// check the model
|
191 |
if (!sbModel->GetPdf()) {
|
192 |
Error("StandardHypoTestDemo","Model %s has no pdf ",modelSBName);
|
193 |
return 0;
|
194 |
}
|
195 |
if (!sbModel->GetParametersOfInterest()) {
|
196 |
Error("StandardHypoTestDemo","Model %s has no poi ",modelSBName);
|
197 |
return 0;
|
198 |
}
|
199 |
if (!sbModel->GetParametersOfInterest()) {
|
200 |
Error("StandardHypoTestInvDemo","Model %s has no poi ",modelSBName);
|
201 |
return 0;
|
202 |
}
|
203 |
if (!sbModel->GetSnapshot() ) {
|
204 |
Info("StandardHypoTestInvDemo","Model %s has no snapshot - make one using model poi",modelSBName);
|
205 |
sbModel->SetSnapshot( *sbModel->GetParametersOfInterest() );
|
206 |
}
|
207 |
|
208 |
|
209 |
if (!bModel || bModel == sbModel) {
|
210 |
Info("StandardHypoTestInvDemo","The background model %s does not exist",modelBName);
|
211 |
Info("StandardHypoTestInvDemo","Copy it from ModelConfig %s and set POI to zero",modelSBName);
|
212 |
bModel = (ModelConfig*) sbModel->Clone();
|
213 |
bModel->SetName(TString(modelSBName)+TString("_with_poi_0"));
|
214 |
RooRealVar * var = dynamic_cast<RooRealVar*>(bModel->GetParametersOfInterest()->first());
|
215 |
if (!var) return 0;
|
216 |
double oldval = var->getVal();
|
217 |
var->setVal(0);
|
218 |
bModel->SetSnapshot( RooArgSet(*var) );
|
219 |
var->setVal(oldval);
|
220 |
}
|
221 |
else {
|
222 |
if (!bModel->GetSnapshot() ) {
|
223 |
Info("StandardHypoTestInvDemo","Model %s has no snapshot - make one using model poi and 0 values ",modelBName);
|
224 |
RooRealVar * var = dynamic_cast<RooRealVar*>(bModel->GetParametersOfInterest()->first());
|
225 |
if (var) {
|
226 |
double oldval = var->getVal();
|
227 |
var->setVal(0);
|
228 |
bModel->SetSnapshot( RooArgSet(*var) );
|
229 |
var->setVal(oldval);
|
230 |
}
|
231 |
else {
|
232 |
Error("StandardHypoTestInvDemo","Model %s has no valid poi",modelBName);
|
233 |
return 0;
|
234 |
}
|
235 |
}
|
236 |
}
|
237 |
|
238 |
|
239 |
SimpleLikelihoodRatioTestStat slrts(*sbModel->GetPdf(),*bModel->GetPdf());
|
240 |
if (sbModel->GetSnapshot()) slrts.SetNullParameters(*sbModel->GetSnapshot());
|
241 |
if (bModel->GetSnapshot()) slrts.SetAltParameters(*bModel->GetSnapshot());
|
242 |
|
243 |
// ratio of profile likelihood - need to pass snapshot for the alt
|
244 |
RatioOfProfiledLikelihoodsTestStat
|
245 |
ropl(*sbModel->GetPdf(), *bModel->GetPdf(), bModel->GetSnapshot());
|
246 |
ropl.SetSubtractMLE(false);
|
247 |
|
248 |
ProfileLikelihoodTestStat profll(*sbModel->GetPdf());
|
249 |
if (testStatType == 3) profll.SetOneSided(1);
|
250 |
if (optimize) profll.SetReuseNLL(true);
|
251 |
|
252 |
TestStatistic * testStat = &slrts;
|
253 |
if (testStatType == 1) testStat = &ropl;
|
254 |
if (testStatType == 2 || testStatType == 3) testStat = &profll;
|
255 |
|
256 |
|
257 |
HypoTestCalculatorGeneric * hc = 0;
|
258 |
if (type == 0) hc = new FrequentistCalculator(*data, *bModel, *sbModel);
|
259 |
else hc = new HybridCalculator(*data, *bModel, *sbModel);
|
260 |
|
261 |
ToyMCSampler *toymcs = (ToyMCSampler*)hc->GetTestStatSampler();
|
262 |
if (useNumberCounting) toymcs->SetNEventsPerToy(1);
|
263 |
toymcs->SetTestStatistic(testStat);
|
264 |
if (optimize) toymcs->SetUseMultiGen(true);
|
265 |
|
266 |
|
267 |
if (type == 1) {
|
268 |
HybridCalculator *hhc = (HybridCalculator*) hc;
|
269 |
hhc->SetToys(ntoys,ntoys);
|
270 |
|
271 |
// check for nuisance prior pdf in case of nuisance parameters
|
272 |
if (bModel->GetNuisanceParameters() || sbModel->GetNuisanceParameters() ) {
|
273 |
RooAbsPdf * nuisPdf = 0;
|
274 |
if (nuisPriorName) nuisPdf = w->pdf(nuisPriorName);
|
275 |
// use prior defined first in bModel (then in SbModel)
|
276 |
if (!nuisPdf) {
|
277 |
Info("StandardHypoTestInvDemo","No nuisance pdf given for the HybridCalculator - try to use the prior pdf from the model");
|
278 |
nuisPdf = (bModel->GetPriorPdf() ) ? bModel->GetPriorPdf() : sbModel->GetPriorPdf();
|
279 |
}
|
280 |
if (!nuisPdf) {
|
281 |
Error("StandardHypoTestInvDemo","Cannnot run Hybrid calculator because no prior on the nuisance parameter is specified");
|
282 |
return 0;
|
283 |
}
|
284 |
const RooArgSet * nuisParams = (bModel->GetNuisanceParameters() ) ? bModel->GetNuisanceParameters() : sbModel->GetNuisanceParameters();
|
285 |
RooArgSet * np = nuisPdf->getObservables(*nuisParams);
|
286 |
if (np->getSize() == 0) {
|
287 |
Warning("StandardHypoTestInvDemo","Prior nuisance does not depend on nuisance parameters. They will be smeared in their full range");
|
288 |
}
|
289 |
delete np;
|
290 |
hhc->ForcePriorNuisanceAlt(*nuisPdf);
|
291 |
hhc->ForcePriorNuisanceNull(*nuisPdf);
|
292 |
}
|
293 |
}
|
294 |
else
|
295 |
((FrequentistCalculator*) hc)->SetToys(ntoys,ntoys);
|
296 |
|
297 |
// Get the result
|
298 |
RooMsgService::instance().getStream(1).removeTopic(RooFit::NumIntegration);
|
299 |
|
300 |
|
301 |
TStopwatch tw; tw.Start();
|
302 |
const RooArgSet * poiSet = sbModel->GetParametersOfInterest();
|
303 |
RooRealVar *poi = (RooRealVar*)poiSet->first();
|
304 |
|
305 |
// fit the data first
|
306 |
sbModel->GetPdf()->fitTo(*data);
|
307 |
double poihat = poi->getVal();
|
308 |
|
309 |
|
310 |
HypoTestInverter calc(*hc);
|
311 |
calc.SetConfidenceLevel(0.95);
|
312 |
|
313 |
calc.UseCLs(useCls);
|
314 |
calc.SetVerbose(true);
|
315 |
|
316 |
// can speed up using proof-lite
|
317 |
if (useProof && nworkers > 1) {
|
318 |
ProofConfig pc(*w, nworkers, "", kFALSE);
|
319 |
toymcs->SetProofConfig(&pc); // enable proof
|
320 |
}
|
321 |
|
322 |
|
323 |
if (npoints > 0) {
|
324 |
if (poimin >= poimax) {
|
325 |
// if no min/max given scan between MLE and +4 sigma
|
326 |
poimin = int(poihat);
|
327 |
poimax = int(poihat + 4 * poi->getError());
|
328 |
}
|
329 |
std::cout << "Doing a fixed scan in interval : " << poimin << " , " << poimax << std::endl;
|
330 |
//calc.SetFixedScan(1,6,6);
|
331 |
calc.SetFixedScan(npoints,poimin,poimax);
|
332 |
}
|
333 |
else {
|
334 |
//poi->setMax(10*int( (poihat+ 10 *poi->getError() )/10 ) );
|
335 |
std::cout << "Doing an automatic scan in interval : " << poi->getMin() << " , " << poi->getMax() << std::endl;
|
336 |
}
|
337 |
|
338 |
HypoTestInverterResult * r = calc.GetInterval();
|
339 |
|
340 |
if (rebuild) {
|
341 |
calc.SetCloseProof(1);
|
342 |
SamplingDistribution * limDist = calc.GetUpperLimitDistribution(true,nToyToRebuild);
|
343 |
if (limDist) {
|
344 |
std::cout << "expected up limit " << limDist->InverseCDF(0.5) << " +/- "
|
345 |
<< limDist->InverseCDF(0.16) << " "
|
346 |
<< limDist->InverseCDF(0.84) << "\n";
|
347 |
}
|
348 |
else
|
349 |
std::cout << "ERROR : failed to re-build distributions " << std::endl;
|
350 |
}
|
351 |
|
352 |
//update r to a new re-freshed copied
|
353 |
r = calc.GetInterval();
|
354 |
|
355 |
delete hc;
|
356 |
|
357 |
return r;
|
358 |
}
|
359 |
|
360 |
void ReadResult(const char * fileName, const char * resultName="", bool useCLs=true) {
|
361 |
// read a previous stored result from a file given the result name
|
362 |
|
363 |
StandardHypoTestInvDemo(fileName, resultName,"","","",0,0,useCLs);
|
364 |
}
|
365 |
|