1 |
+ |
/* -*- mode: c++ -*- */ |
2 |
|
// Standard tutorial macro for performing an inverted hypothesis test |
3 |
|
// |
4 |
|
// This macro will perform a scan of tehe p-values for computing the limit |
8 |
|
|
9 |
|
#include "StandardHypoTestInvDemo.h" |
10 |
|
|
11 |
+ |
#include "TFile.h" |
12 |
+ |
#include "RooWorkspace.h" |
13 |
+ |
#include "RooAbsPdf.h" |
14 |
+ |
#include "RooRealVar.h" |
15 |
+ |
#include "RooDataSet.h" |
16 |
+ |
#include "RooStats/ModelConfig.h" |
17 |
+ |
#include "TGraphErrors.h" |
18 |
+ |
#include "TGraphAsymmErrors.h" |
19 |
+ |
#include "TCanvas.h" |
20 |
+ |
#include "TLine.h" |
21 |
+ |
#include "TROOT.h" |
22 |
+ |
|
23 |
+ |
#include "RooStats/AsymptoticCalculator.h" |
24 |
+ |
#include "RooStats/HybridCalculator.h" |
25 |
+ |
#include "RooStats/FrequentistCalculator.h" |
26 |
+ |
#include "RooStats/ToyMCSampler.h" |
27 |
+ |
#include "RooStats/HypoTestPlot.h" |
28 |
+ |
|
29 |
+ |
#include "RooStats/NumEventsTestStat.h" |
30 |
+ |
#include "RooStats/ProfileLikelihoodTestStat.h" |
31 |
+ |
#include "RooStats/SimpleLikelihoodRatioTestStat.h" |
32 |
+ |
#include "RooStats/RatioOfProfiledLikelihoodsTestStat.h" |
33 |
+ |
#include "RooStats/MaxLikelihoodEstimateTestStat.h" |
34 |
+ |
|
35 |
+ |
#include "RooStats/HypoTestInverter.h" |
36 |
+ |
#include "RooStats/HypoTestInverterResult.h" |
37 |
+ |
#include "RooStats/HypoTestInverterPlot.h" |
38 |
|
|
39 |
|
using namespace RooFit; |
40 |
|
using namespace RooStats; |
41 |
|
|
42 |
|
|
43 |
< |
bool plotHypoTestResult = true; |
44 |
< |
bool useProof = true; |
45 |
< |
bool optimize = false; |
46 |
< |
bool writeResult = false; |
47 |
< |
int nworkers = 4; |
48 |
< |
bool rebuild = false; |
49 |
< |
int nToyToRebuild = 100; |
43 |
> |
bool plotHypoTestResult = true; // plot test statistic result at each point |
44 |
> |
bool writeResult = true; // write HypoTestInverterResult in a file |
45 |
> |
bool optimize = true; // optmize evaluation of test statistic |
46 |
> |
bool useVectorStore = true; // convert data to use new roofit data store |
47 |
> |
bool generateBinned = false; // generate binned data sets |
48 |
> |
bool noSystematics = false; // force all systematics to be off (i.e. set all nuisance parameters as constat |
49 |
> |
// to their nominal values) |
50 |
> |
double nToysRatio = 2; // ratio Ntoys S+b/ntoysB |
51 |
> |
double maxPOI = -1; // max value used of POI (in case of auto scan) |
52 |
> |
bool useProof = false; // use Proof Light when using toys (for freq or hybrid) |
53 |
> |
int nworkers = 4; // number of worker for Proof |
54 |
> |
bool rebuild = false; // re-do extra toys for computing expected limits and rebuild test stat |
55 |
> |
// distributions (N.B this requires much more CPU (factor is equivalent to nToyToRebuild) |
56 |
> |
int nToyToRebuild = 100; // number of toys used to rebuild |
57 |
> |
|
58 |
> |
std::string massValue = ""; // extra string to tag output file of result |
59 |
> |
std::string minimizerType = ""; // minimizer type (default is what is in ROOT::Math::MinimizerOptions::DefaultMinimizerType() |
60 |
> |
int printLevel = 0; // print level for debugging PL test statistics and calculators |
61 |
|
|
62 |
|
|
63 |
|
|
64 |
+ |
// internal class to run the inverter and more |
65 |
+ |
|
66 |
+ |
|
67 |
+ |
RooStats::HypoTestInvTool::HypoTestInvTool() : mPlotHypoTestResult(true), |
68 |
+ |
mWriteResult(false), |
69 |
+ |
mOptimize(true), |
70 |
+ |
mUseVectorStore(true), |
71 |
+ |
mGenerateBinned(false), |
72 |
+ |
mUseProof(false), |
73 |
+ |
mRebuild(false), |
74 |
+ |
mNWorkers(4), |
75 |
+ |
mNToyToRebuild(100), |
76 |
+ |
mPrintLevel(0), |
77 |
+ |
mNToysRatio(2), |
78 |
+ |
mMaxPoi(-1), |
79 |
+ |
mMassValue(""), |
80 |
+ |
mMinimizerType(""){ |
81 |
+ |
} |
82 |
|
|
83 |
|
|
27 |
– |
void StandardHypoTestInvDemo(const char * fileName, |
28 |
– |
const char * wsName, |
29 |
– |
const char * modelSBName, |
30 |
– |
const char * modelBName, |
31 |
– |
const char * dataName, |
32 |
– |
int calculatorType, |
33 |
– |
int testStatType, |
34 |
– |
bool useCls, |
35 |
– |
int npoints, |
36 |
– |
double poimin, |
37 |
– |
double poimax, |
38 |
– |
int ntoys, |
39 |
– |
bool useNumberCounting, |
40 |
– |
const char * nuisPriorName) |
41 |
– |
{ |
42 |
– |
/* |
84 |
|
|
85 |
< |
Other Parameter to pass in tutorial |
86 |
< |
apart from standard for filename, ws, modelconfig and data |
85 |
> |
void |
86 |
> |
RooStats::HypoTestInvTool::SetParameter(const char * name, bool value){ |
87 |
> |
// |
88 |
> |
// set boolean parameters |
89 |
> |
// |
90 |
> |
|
91 |
> |
std::string s_name(name); |
92 |
> |
|
93 |
> |
if (s_name.find("PlotHypoTestResult") != std::string::npos) mPlotHypoTestResult = value; |
94 |
> |
if (s_name.find("WriteResult") != std::string::npos) mWriteResult = value; |
95 |
> |
if (s_name.find("Optimize") != std::string::npos) mOptimize = value; |
96 |
> |
if (s_name.find("UseVectorStore") != std::string::npos) mUseVectorStore = value; |
97 |
> |
if (s_name.find("GenerateBinned") != std::string::npos) mGenerateBinned = value; |
98 |
> |
if (s_name.find("UseProof") != std::string::npos) mUseProof = value; |
99 |
> |
if (s_name.find("Rebuild") != std::string::npos) mRebuild = value; |
100 |
> |
|
101 |
> |
return; |
102 |
> |
} |
103 |
> |
|
104 |
> |
|
105 |
> |
|
106 |
> |
void |
107 |
> |
RooStats::HypoTestInvTool::SetParameter(const char * name, int value){ |
108 |
> |
// |
109 |
> |
// set integer parameters |
110 |
> |
// |
111 |
> |
|
112 |
> |
std::string s_name(name); |
113 |
> |
|
114 |
> |
if (s_name.find("NWorkers") != std::string::npos) mNWorkers = value; |
115 |
> |
if (s_name.find("NToyToRebuild") != std::string::npos) mNToyToRebuild = value; |
116 |
> |
if (s_name.find("PrintLevel") != std::string::npos) mPrintLevel = value; |
117 |
> |
|
118 |
> |
return; |
119 |
> |
} |
120 |
> |
|
121 |
> |
|
122 |
> |
|
123 |
> |
void |
124 |
> |
RooStats::HypoTestInvTool::SetParameter(const char * name, double value){ |
125 |
> |
// |
126 |
> |
// set double precision parameters |
127 |
> |
// |
128 |
> |
|
129 |
> |
std::string s_name(name); |
130 |
> |
|
131 |
> |
if (s_name.find("NToysRatio") != std::string::npos) mNToysRatio = value; |
132 |
> |
if (s_name.find("MaxPoi") != std::string::npos) mMaxPoi = value; |
133 |
> |
|
134 |
> |
return; |
135 |
> |
} |
136 |
> |
|
137 |
> |
|
138 |
> |
|
139 |
> |
void |
140 |
> |
RooStats::HypoTestInvTool::SetParameter(const char * name, const char * value){ |
141 |
> |
// |
142 |
> |
// set string parameters |
143 |
> |
// |
144 |
> |
|
145 |
> |
std::string s_name(name); |
146 |
> |
|
147 |
> |
if (s_name.find("MassValue") != std::string::npos) mMassValue.assign(value); |
148 |
> |
if (s_name.find("MinimizerType") != std::string::npos) mMinimizerType.assign(value); |
149 |
> |
|
150 |
> |
return; |
151 |
> |
} |
152 |
> |
|
153 |
> |
|
154 |
> |
|
155 |
> |
void |
156 |
> |
StandardHypoTestInvDemo(const char * infile = 0, |
157 |
> |
const char * wsName = "combined", |
158 |
> |
const char * modelSBName = "ModelConfig", |
159 |
> |
const char * modelBName = "", |
160 |
> |
const char * dataName = "obsData", |
161 |
> |
int calculatorType = 0, |
162 |
> |
int testStatType = 0, |
163 |
> |
bool useCLs = true , |
164 |
> |
int npoints = 6, |
165 |
> |
double poimin = 0, |
166 |
> |
double poimax = 5, |
167 |
> |
int ntoys=1000, |
168 |
> |
bool useNumberCounting = false, |
169 |
> |
const char * nuisPriorName = 0){ |
170 |
> |
/* |
171 |
|
|
172 |
< |
type = 0 Freq calculator |
173 |
< |
type = 1 Hybrid |
172 |
> |
Other Parameter to pass in tutorial |
173 |
> |
apart from standard for filename, ws, modelconfig and data |
174 |
|
|
175 |
< |
testStatType = 0 LEP |
176 |
< |
= 1 Tevatron |
177 |
< |
= 2 Profile Likelihood |
53 |
< |
= 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat) |
175 |
> |
type = 0 Freq calculator |
176 |
> |
type = 1 Hybrid calculator |
177 |
> |
type = 2 Asymptotic calculator |
178 |
|
|
179 |
< |
useCLs scan for CLs (otherwise for CLs+b) |
179 |
> |
testStatType = 0 LEP |
180 |
> |
= 1 Tevatron |
181 |
> |
= 2 Profile Likelihood |
182 |
> |
= 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat) |
183 |
|
|
184 |
< |
npoints: number of points to scan , for autoscan set npoints = -1 |
184 |
> |
useCLs scan for CLs (otherwise for CLs+b) |
185 |
|
|
186 |
< |
poimin,poimax: min/max value to scan in case of fixed scans |
60 |
< |
(if min >= max, try to find automatically) |
186 |
> |
npoints: number of points to scan , for autoscan set npoints = -1 |
187 |
|
|
188 |
< |
ntoys: number of toys to use |
188 |
> |
poimin,poimax: min/max value to scan in case of fixed scans |
189 |
> |
(if min >= max, try to find automatically) |
190 |
|
|
191 |
< |
useNumberCounting: set to true when using number counting events |
191 |
> |
ntoys: number of toys to use |
192 |
|
|
193 |
< |
nuisPriorName: name of prior for the nnuisance. This is often expressed as constraint term in the global model |
67 |
< |
It is needed only when using the HybridCalculator (type=1) |
68 |
< |
If not given by default the prior pdf from ModelConfig is used. |
193 |
> |
useNumberCounting: set to true when using number counting events |
194 |
|
|
195 |
< |
extra options are available as global paramters of the macro. They are: |
195 |
> |
nuisPriorName: name of prior for the nnuisance. This is often expressed as constraint term in the global model |
196 |
> |
It is needed only when using the HybridCalculator (type=1) |
197 |
> |
If not given by default the prior pdf from ModelConfig is used. |
198 |
|
|
199 |
< |
plotHypoTestResult plot result of tests at each point (TS distributions) |
73 |
< |
useProof = true; |
74 |
< |
writeResult = true; |
75 |
< |
nworkers = 4; |
199 |
> |
extra options are available as global paramwters of the macro. They major ones are: |
200 |
|
|
201 |
+ |
plotHypoTestResult plot result of tests at each point (TS distributions) (defauly is true) |
202 |
+ |
useProof use Proof (default is true) |
203 |
+ |
writeResult write result of scan (default is true) |
204 |
+ |
rebuild rebuild scan for expected limits (require extra toys) (default is false) |
205 |
+ |
generateBinned generate binned data sets for toys (default is false) - be careful not to activate with |
206 |
+ |
a too large (>=3) number of observables |
207 |
+ |
nToyRatio ratio of S+B/B toys (default is 2) |
208 |
+ |
|
209 |
|
|
210 |
< |
*/ |
210 |
> |
*/ |
211 |
|
|
212 |
< |
if (fileName==0) { |
213 |
< |
fileName = "example_combined_GaussExample_model.root"; |
214 |
< |
std::cout << "Use standard file generated with HistFactory :" << fileName << std::endl; |
212 |
> |
|
213 |
> |
|
214 |
> |
TString fileName(infile); |
215 |
> |
if (fileName.IsNull()) { |
216 |
> |
fileName = "results/example_combined_GaussExample_model.root"; |
217 |
> |
std::cout << "Use standard file generated with HistFactory : " << fileName << std::endl; |
218 |
> |
} |
219 |
> |
|
220 |
> |
// open file and check if input file exists |
221 |
> |
TFile * file = TFile::Open(fileName); |
222 |
> |
|
223 |
> |
// if input file was specified but not found, quit |
224 |
> |
if(!file && !TString(infile).IsNull()){ |
225 |
> |
std::cout <<"file " << fileName << " not found" << std::endl; |
226 |
> |
return; |
227 |
> |
} |
228 |
> |
|
229 |
> |
// if default file not found, try to create it |
230 |
> |
if(!file ){ |
231 |
> |
// Normally this would be run on the command line |
232 |
> |
std::cout <<"will run standard hist2workspace example"<<std::endl; |
233 |
> |
gROOT->ProcessLine(".! prepareHistFactory ."); |
234 |
> |
gROOT->ProcessLine(".! hist2workspace config/example.xml"); |
235 |
> |
std::cout <<"\n\n---------------------"<<std::endl; |
236 |
> |
std::cout <<"Done creating example input"<<std::endl; |
237 |
> |
std::cout <<"---------------------\n\n"<<std::endl; |
238 |
> |
|
239 |
> |
// now try to access the file again |
240 |
> |
file = TFile::Open(fileName); |
241 |
> |
|
242 |
|
} |
243 |
< |
TFile * file = new TFile(fileName); |
243 |
> |
|
244 |
> |
if(!file){ |
245 |
> |
// if it is still not there, then we can't continue |
246 |
> |
std::cout << "Not able to run hist2workspace to create example input" <<std::endl; |
247 |
> |
return; |
248 |
> |
} |
249 |
> |
|
250 |
> |
|
251 |
> |
|
252 |
> |
HypoTestInvTool calc; |
253 |
> |
|
254 |
> |
// set parameters |
255 |
> |
calc.SetParameter("PlotHypoTestResult", plotHypoTestResult); |
256 |
> |
calc.SetParameter("WriteResult", writeResult); |
257 |
> |
calc.SetParameter("Optimize", optimize); |
258 |
> |
calc.SetParameter("UseVectorStore", useVectorStore); |
259 |
> |
calc.SetParameter("GenerateBinned", generateBinned); |
260 |
> |
calc.SetParameter("NToysRatio", nToysRatio); |
261 |
> |
calc.SetParameter("MaxPOI", maxPOI); |
262 |
> |
calc.SetParameter("UseProof", useProof); |
263 |
> |
calc.SetParameter("Nworkers", nworkers); |
264 |
> |
calc.SetParameter("Rebuild", rebuild); |
265 |
> |
calc.SetParameter("NToyToRebuild", nToyToRebuild); |
266 |
> |
calc.SetParameter("MassValue", massValue.c_str()); |
267 |
> |
calc.SetParameter("MinimizerType", minimizerType.c_str()); |
268 |
> |
calc.SetParameter("PrintLevel", printLevel); |
269 |
> |
|
270 |
|
|
271 |
|
RooWorkspace * w = dynamic_cast<RooWorkspace*>( file->Get(wsName) ); |
272 |
< |
HypoTestInverterResult * r = 0; |
272 |
> |
HypoTestInverterResult * r = 0; |
273 |
|
std::cout << w << "\t" << fileName << std::endl; |
274 |
|
if (w != NULL) { |
275 |
< |
r = RunInverter(w, modelSBName, modelBName, dataName, calculatorType, testStatType, npoints, poimin, poimax, |
276 |
< |
ntoys, useCls, useNumberCounting, nuisPriorName ); |
275 |
> |
r = calc.RunInverter(w, modelSBName, modelBName, |
276 |
> |
dataName, calculatorType, testStatType, useCLs, |
277 |
> |
npoints, poimin, poimax, |
278 |
> |
ntoys, useNumberCounting, nuisPriorName ); |
279 |
|
if (!r) { |
280 |
|
std::cerr << "Error running the HypoTestInverter - Exit " << std::endl; |
281 |
|
return; |
282 |
|
} |
283 |
|
} |
284 |
< |
else |
98 |
< |
{ |
284 |
> |
else { |
285 |
|
// case workspace is not present look for the inverter result |
286 |
|
std::cout << "Reading an HypoTestInverterResult with name " << wsName << " from file " << fileName << std::endl; |
287 |
|
r = dynamic_cast<HypoTestInverterResult*>( file->Get(wsName) ); // |
291 |
|
file->ls(); |
292 |
|
return; |
293 |
|
} |
294 |
< |
} |
295 |
< |
|
296 |
< |
|
297 |
< |
double upperLimit = r->UpperLimit(); |
298 |
< |
double ulError = r->UpperLimitEstimatedError(); |
299 |
< |
|
114 |
< |
|
115 |
< |
std::cout << "The computed upper limit is: " << upperLimit << " +/- " << ulError << std::endl; |
116 |
< |
|
117 |
< |
const int nEntries = r->ArraySize(); |
118 |
< |
|
294 |
> |
} |
295 |
> |
|
296 |
> |
calc.AnalyzeResult( r, calculatorType, testStatType, useCLs, npoints, infile ); |
297 |
> |
|
298 |
> |
return; |
299 |
> |
} |
300 |
|
|
120 |
– |
const char * typeName = (calculatorType == 0) ? "Frequentist" : "Hybrid"; |
121 |
– |
const char * resultName = (w) ? w->GetName() : r->GetName(); |
122 |
– |
TString plotTitle = TString::Format("%s CL Scan for workspace %s",typeName,resultName); |
123 |
– |
HypoTestInverterPlot *plot = new HypoTestInverterPlot("HTI_Result_Plot",plotTitle,r); |
124 |
– |
plot->Draw("CLb 2CL"); // plot all and Clb |
301 |
|
|
126 |
– |
if (plotHypoTestResult) { |
127 |
– |
TCanvas * c2 = new TCanvas(); |
128 |
– |
c2->Divide( 2, TMath::Ceil(nEntries/2)); |
129 |
– |
for (int i=0; i<nEntries; i++) { |
130 |
– |
c2->cd(i+1); |
131 |
– |
SamplingDistPlot * pl = plot->MakeTestStatPlot(i); |
132 |
– |
pl->SetLogYaxis(true); |
133 |
– |
pl->Draw(); |
134 |
– |
} |
135 |
– |
} |
302 |
|
|
303 |
+ |
void |
304 |
+ |
RooStats::HypoTestInvTool::AnalyzeResult( HypoTestInverterResult * r, |
305 |
+ |
int calculatorType, |
306 |
+ |
int testStatType, |
307 |
+ |
bool useCLs, |
308 |
+ |
int npoints, |
309 |
+ |
const char * fileNameBase ){ |
310 |
|
|
311 |
< |
std::cout << " expected limit (median) " << r->GetExpectedUpperLimit(0) << std::endl; |
311 |
> |
// analyize result produced by the inverter, optionally save it in a file |
312 |
> |
|
313 |
> |
double upperLimit = r->UpperLimit(); |
314 |
> |
double ulError = r->UpperLimitEstimatedError(); |
315 |
> |
|
316 |
> |
std::cout << "The computed upper limit is: " << upperLimit << " +/- " << ulError << std::endl; |
317 |
> |
|
318 |
> |
// compute expected limit |
319 |
> |
std::cout << " expected limit (median) " << r->GetExpectedUpperLimit(0) << std::endl; |
320 |
|
std::cout << " expected limit (-1 sig) " << r->GetExpectedUpperLimit(-1) << std::endl; |
321 |
|
std::cout << " expected limit (+1 sig) " << r->GetExpectedUpperLimit(1) << std::endl; |
322 |
< |
|
323 |
< |
|
324 |
< |
if (w != NULL && writeResult) { |
325 |
< |
|
322 |
> |
std::cout << " expected limit (-2 sig) " << r->GetExpectedUpperLimit(-2) << std::endl; |
323 |
> |
std::cout << " expected limit (+2 sig) " << r->GetExpectedUpperLimit(2) << std::endl; |
324 |
> |
|
325 |
> |
|
326 |
> |
// write result in a file |
327 |
> |
if (r != NULL && mWriteResult) { |
328 |
> |
|
329 |
|
// write to a file the results |
330 |
< |
const char * calcType = (calculatorType == 0) ? "Freq" : "Hybr"; |
331 |
< |
const char * limitType = (useCls) ? "CLs" : "Cls+b"; |
330 |
> |
const char * calcType = (calculatorType == 0) ? "Freq" : (calculatorType == 1) ? "Hybr" : "Asym"; |
331 |
> |
const char * limitType = (useCLs) ? "CLs" : "Cls+b"; |
332 |
|
const char * scanType = (npoints < 0) ? "auto" : "grid"; |
333 |
|
TString resultFileName = TString::Format("%s_%s_%s_ts%d_",calcType,limitType,scanType,testStatType); |
334 |
< |
resultFileName += fileName; |
335 |
< |
|
334 |
> |
//strip the / from the filename |
335 |
> |
if (mMassValue.size()>0) { |
336 |
> |
resultFileName += mMassValue.c_str(); |
337 |
> |
resultFileName += "_"; |
338 |
> |
} |
339 |
> |
|
340 |
> |
TString name = fileNameBase; |
341 |
> |
name.Replace(0, name.Last('/')+1, ""); |
342 |
> |
resultFileName += name; |
343 |
> |
|
344 |
|
TFile * fileOut = new TFile(resultFileName,"RECREATE"); |
345 |
|
r->Write(); |
346 |
|
fileOut->Close(); |
347 |
|
} |
348 |
< |
|
348 |
> |
|
349 |
> |
|
350 |
> |
// plot the result ( p values vs scan points) |
351 |
> |
std::string typeName = ""; |
352 |
> |
if (calculatorType == 0 ) |
353 |
> |
typeName = "Frequentist"; |
354 |
> |
if (calculatorType == 1 ) |
355 |
> |
typeName = "Hybrid"; |
356 |
> |
else if (calculatorType == 2 ) { |
357 |
> |
typeName = "Asymptotic"; |
358 |
> |
mPlotHypoTestResult = false; |
359 |
> |
} |
360 |
> |
|
361 |
> |
const char * resultName = r->GetName(); |
362 |
> |
TString plotTitle = TString::Format("%s CL Scan for workspace %s",typeName.c_str(),resultName); |
363 |
> |
HypoTestInverterPlot *plot = new HypoTestInverterPlot("HTI_Result_Plot",plotTitle,r); |
364 |
> |
plot->Draw("CLb 2CL"); // plot all and Clb |
365 |
> |
|
366 |
> |
const int nEntries = r->ArraySize(); |
367 |
> |
|
368 |
> |
// plot test statistics distributions for the two hypothesis |
369 |
> |
if (mPlotHypoTestResult) { |
370 |
> |
TCanvas * c2 = new TCanvas(); |
371 |
> |
if (nEntries > 1) { |
372 |
> |
int ny = TMath::CeilNint( sqrt(nEntries) ); |
373 |
> |
int nx = TMath::CeilNint(double(nEntries)/ny); |
374 |
> |
c2->Divide( nx,ny); |
375 |
> |
} |
376 |
> |
for (int i=0; i<nEntries; i++) { |
377 |
> |
if (nEntries > 1) c2->cd(i+1); |
378 |
> |
SamplingDistPlot * pl = plot->MakeTestStatPlot(i); |
379 |
> |
pl->SetLogYaxis(true); |
380 |
> |
pl->Draw(); |
381 |
> |
} |
382 |
> |
} |
383 |
|
} |
384 |
|
|
385 |
|
|
386 |
+ |
|
387 |
|
// internal routine to run the inverter |
388 |
< |
HypoTestInverterResult * RunInverter(RooWorkspace * w, const char * modelSBName, const char * modelBName, |
389 |
< |
const char * dataName, int type, int testStatType, |
390 |
< |
int npoints, double poimin, double poimax, |
391 |
< |
int ntoys, bool useCls, bool useNumberCounting, const char * nuisPriorName ) |
392 |
< |
{ |
388 |
> |
HypoTestInverterResult * |
389 |
> |
RooStats::HypoTestInvTool::RunInverter(RooWorkspace * w, |
390 |
> |
const char * modelSBName, const char * modelBName, |
391 |
> |
const char * dataName, int type, int testStatType, |
392 |
> |
bool useCLs, int npoints, double poimin, double poimax, |
393 |
> |
int ntoys, |
394 |
> |
bool useNumberCounting, |
395 |
> |
const char * nuisPriorName ){ |
396 |
|
|
397 |
|
std::cout << "Running HypoTestInverter on the workspace " << w->GetName() << std::endl; |
398 |
< |
|
398 |
> |
|
399 |
|
w->Print(); |
400 |
< |
|
401 |
< |
|
400 |
> |
|
401 |
> |
|
402 |
|
RooAbsData * data = w->data(dataName); |
403 |
|
if (!data) { |
404 |
|
Error("StandardHypoTestDemo","Not existing data %s",dataName); |
406 |
|
} |
407 |
|
else |
408 |
|
std::cout << "Using data set " << dataName << std::endl; |
409 |
< |
|
410 |
< |
|
409 |
> |
|
410 |
> |
if (mUseVectorStore) { |
411 |
> |
RooAbsData::defaultStorageType = RooAbsData::Vector; |
412 |
> |
data->convertToVectorStore() ; |
413 |
> |
} |
414 |
> |
|
415 |
> |
|
416 |
|
// get models from WS |
417 |
|
// get the modelConfig out of the file |
418 |
|
ModelConfig* bModel = (ModelConfig*) w->obj(modelBName); |
419 |
|
ModelConfig* sbModel = (ModelConfig*) w->obj(modelSBName); |
420 |
< |
|
420 |
> |
|
421 |
|
if (!sbModel) { |
422 |
|
Error("StandardHypoTestDemo","Not existing ModelConfig %s",modelSBName); |
423 |
|
return 0; |
431 |
|
Error("StandardHypoTestDemo","Model %s has no poi ",modelSBName); |
432 |
|
return 0; |
433 |
|
} |
434 |
< |
if (!sbModel->GetParametersOfInterest()) { |
435 |
< |
Error("StandardHypoTestInvDemo","Model %s has no poi ",modelSBName); |
434 |
> |
if (!sbModel->GetObservables()) { |
435 |
> |
Error("StandardHypoTestInvDemo","Model %s has no observables ",modelSBName); |
436 |
|
return 0; |
437 |
|
} |
438 |
|
if (!sbModel->GetSnapshot() ) { |
439 |
|
Info("StandardHypoTestInvDemo","Model %s has no snapshot - make one using model poi",modelSBName); |
440 |
|
sbModel->SetSnapshot( *sbModel->GetParametersOfInterest() ); |
441 |
|
} |
442 |
< |
|
443 |
< |
|
442 |
> |
|
443 |
> |
// case of no systematics |
444 |
> |
// remove nuisance parameters from model |
445 |
> |
if (noSystematics) { |
446 |
> |
const RooArgSet * nuisPar = sbModel->GetNuisanceParameters(); |
447 |
> |
if (nuisPar && nuisPar->getSize() > 0) { |
448 |
> |
std::cout << "StandardHypoTestInvDemo" << " - Switch off all systematics by setting them constant to their initial values" << std::endl; |
449 |
> |
RooStats::SetAllConstant(*nuisPar); |
450 |
> |
} |
451 |
> |
if (bModel) { |
452 |
> |
const RooArgSet * bnuisPar = bModel->GetNuisanceParameters(); |
453 |
> |
if (bnuisPar) |
454 |
> |
RooStats::SetAllConstant(*bnuisPar); |
455 |
> |
} |
456 |
> |
} |
457 |
> |
|
458 |
|
if (!bModel || bModel == sbModel) { |
459 |
|
Info("StandardHypoTestInvDemo","The background model %s does not exist",modelBName); |
460 |
|
Info("StandardHypoTestInvDemo","Copy it from ModelConfig %s and set POI to zero",modelSBName); |
483 |
|
} |
484 |
|
} |
485 |
|
} |
486 |
< |
|
487 |
< |
|
486 |
> |
|
487 |
> |
// run first a data fit |
488 |
> |
|
489 |
> |
const RooArgSet * poiSet = sbModel->GetParametersOfInterest(); |
490 |
> |
RooRealVar *poi = (RooRealVar*)poiSet->first(); |
491 |
> |
|
492 |
> |
std::cout << "StandardHypoTestInvDemo : POI initial value: " << poi->GetName() << " = " << poi->getVal() << std::endl; |
493 |
> |
|
494 |
> |
// fit the data first (need to use constraint ) |
495 |
> |
Info( "StandardHypoTestInvDemo"," Doing a first fit to the observed data "); |
496 |
> |
if (minimizerType.size()==0) minimizerType = ROOT::Math::MinimizerOptions::DefaultMinimizerType(); |
497 |
> |
else |
498 |
> |
ROOT::Math::MinimizerOptions::SetDefaultMinimizer(minimizerType.c_str()); |
499 |
> |
Info("StandardHypoTestInvDemo","Using %s as minimizer for computing the test statistic", |
500 |
> |
ROOT::Math::MinimizerOptions::DefaultMinimizerType().c_str() ); |
501 |
> |
RooArgSet constrainParams; |
502 |
> |
if (sbModel->GetNuisanceParameters() ) constrainParams.add(*sbModel->GetNuisanceParameters()); |
503 |
> |
RooStats::RemoveConstantParameters(&constrainParams); |
504 |
> |
TStopwatch tw; |
505 |
> |
tw.Start(); |
506 |
> |
RooFitResult * fitres = sbModel->GetPdf()->fitTo(*data,InitialHesse(false), Hesse(false), |
507 |
> |
Minimizer(minimizerType.c_str(),"Migrad"), Strategy(0), PrintLevel(mPrintLevel+1), Constrain(constrainParams), Save(true) ); |
508 |
> |
if (fitres->status() != 0) { |
509 |
> |
Warning("StandardHypoTestInvDemo","Fit to the model failed - try with strategy 1 and perform first an Hesse computation"); |
510 |
> |
fitres = sbModel->GetPdf()->fitTo(*data,InitialHesse(true), Hesse(false),Minimizer(minimizerType.c_str(),"Migrad"), Strategy(1), PrintLevel(mPrintLevel+1), Constrain(constrainParams), Save(true) ); |
511 |
> |
} |
512 |
> |
if (fitres->status() != 0) |
513 |
> |
Warning("StandardHypoTestInvDemo"," Fit still failed - continue anyway....."); |
514 |
> |
|
515 |
> |
|
516 |
> |
double poihat = poi->getVal(); |
517 |
> |
std::cout << "StandardHypoTestInvDemo - Best Fit value : " << poi->GetName() << " = " |
518 |
> |
<< poihat << " +/- " << poi->getError() << std::endl; |
519 |
> |
std::cout << "Time for fitting : "; tw.Print(); |
520 |
> |
|
521 |
> |
//save best fit value in the poi snapshot |
522 |
> |
sbModel->SetSnapshot(*sbModel->GetParametersOfInterest()); |
523 |
> |
std::cout << "StandardHypoTestInvo: snapshot of S+B Model " << sbModel->GetName() |
524 |
> |
<< " is set to the best fit value" << std::endl; |
525 |
> |
|
526 |
> |
// build test statistics and hypotest calculators for running the inverter |
527 |
> |
|
528 |
|
SimpleLikelihoodRatioTestStat slrts(*sbModel->GetPdf(),*bModel->GetPdf()); |
529 |
+ |
|
530 |
|
if (sbModel->GetSnapshot()) slrts.SetNullParameters(*sbModel->GetSnapshot()); |
531 |
|
if (bModel->GetSnapshot()) slrts.SetAltParameters(*bModel->GetSnapshot()); |
532 |
< |
|
532 |
> |
|
533 |
|
// ratio of profile likelihood - need to pass snapshot for the alt |
534 |
|
RatioOfProfiledLikelihoodsTestStat |
535 |
|
ropl(*sbModel->GetPdf(), *bModel->GetPdf(), bModel->GetSnapshot()); |
536 |
|
ropl.SetSubtractMLE(false); |
537 |
< |
|
537 |
> |
ropl.SetPrintLevel(mPrintLevel); |
538 |
> |
ropl.SetMinimizer(minimizerType.c_str()); |
539 |
> |
|
540 |
|
ProfileLikelihoodTestStat profll(*sbModel->GetPdf()); |
541 |
|
if (testStatType == 3) profll.SetOneSided(1); |
542 |
< |
if (optimize) profll.SetReuseNLL(true); |
542 |
> |
profll.SetMinimizer(minimizerType.c_str()); |
543 |
> |
profll.SetPrintLevel(mPrintLevel); |
544 |
|
|
545 |
< |
TestStatistic * testStat = &slrts; |
546 |
< |
if (testStatType == 1) testStat = &ropl; |
547 |
< |
if (testStatType == 2 || testStatType == 3) testStat = &profll; |
545 |
> |
profll.SetReuseNLL(mOptimize); |
546 |
> |
slrts.SetReuseNLL(mOptimize); |
547 |
> |
ropl.SetReuseNLL(mOptimize); |
548 |
> |
|
549 |
> |
if (mOptimize) { |
550 |
> |
profll.SetStrategy(0); |
551 |
> |
ropl.SetStrategy(0); |
552 |
> |
} |
553 |
|
|
554 |
< |
|
554 |
> |
if (mMaxPoi > 0) poi->setMax(mMaxPoi); // increase limit |
555 |
> |
|
556 |
> |
MaxLikelihoodEstimateTestStat maxll(*sbModel->GetPdf(),*poi); |
557 |
> |
|
558 |
> |
// create the HypoTest calculator class |
559 |
|
HypoTestCalculatorGeneric * hc = 0; |
560 |
|
if (type == 0) hc = new FrequentistCalculator(*data, *bModel, *sbModel); |
561 |
< |
else hc = new HybridCalculator(*data, *bModel, *sbModel); |
562 |
< |
|
561 |
> |
else if (type == 1) hc = new HybridCalculator(*data, *bModel, *sbModel); |
562 |
> |
else if (type == 2) hc = new AsymptoticCalculator(*data, *bModel, *sbModel); |
563 |
> |
else { |
564 |
> |
Error("StandardHypoTestInvDemo","Invalid - calculator type = %d supported values are only :\n\t\t\t 0 (Frequentist) , 1 (Hybrid) , 2 (Asymptotic) ",type); |
565 |
> |
return 0; |
566 |
> |
} |
567 |
> |
|
568 |
> |
// set the test statistic |
569 |
> |
TestStatistic * testStat = 0; |
570 |
> |
if (testStatType == 0) testStat = &slrts; |
571 |
> |
if (testStatType == 1) testStat = &ropl; |
572 |
> |
if (testStatType == 2 || testStatType == 3) testStat = &profll; |
573 |
> |
if (testStatType == 4) testStat = &maxll; |
574 |
> |
if (testStat == 0) { |
575 |
> |
Error("StandardHypoTestInvDemo","Invalid - test statistic type = %d supported values are only :\n\t\t\t 0 (SLR) , 1 (Tevatron) , 2 (PLR), 3 (PLR1), 4(MLE)",testStatType); |
576 |
> |
return 0; |
577 |
> |
} |
578 |
> |
|
579 |
> |
|
580 |
|
ToyMCSampler *toymcs = (ToyMCSampler*)hc->GetTestStatSampler(); |
581 |
< |
if (useNumberCounting) toymcs->SetNEventsPerToy(1); |
582 |
< |
toymcs->SetTestStatistic(testStat); |
583 |
< |
if (optimize) toymcs->SetUseMultiGen(true); |
584 |
< |
|
585 |
< |
|
581 |
> |
if (toymcs) { |
582 |
> |
if (useNumberCounting) toymcs->SetNEventsPerToy(1); |
583 |
> |
toymcs->SetTestStatistic(testStat); |
584 |
> |
|
585 |
> |
if (data->isWeighted() && !mGenerateBinned) { |
586 |
> |
Info("StandardHypoTestInvDemo","Data set is weighted, nentries = %d and sum of weights = %8.1f but toy generation is unbinned - it would be faster to set mGenerateBinned to true\n",data->numEntries(), data->sumEntries()); |
587 |
> |
} |
588 |
> |
toymcs->SetGenerateBinned(mGenerateBinned); |
589 |
> |
|
590 |
> |
toymcs->SetUseMultiGen(mOptimize); |
591 |
> |
|
592 |
> |
if (mGenerateBinned && sbModel->GetObservables()->getSize() > 2) { |
593 |
> |
Warning("StandardHypoTestInvDemo","generate binned is activated but the number of ovservable is %d. Too much memory could be needed for allocating all the bins",sbModel->GetObservables()->getSize() ); |
594 |
> |
} |
595 |
> |
|
596 |
> |
} |
597 |
> |
|
598 |
> |
|
599 |
|
if (type == 1) { |
600 |
< |
HybridCalculator *hhc = (HybridCalculator*) hc; |
601 |
< |
hhc->SetToys(ntoys,ntoys); |
602 |
< |
|
600 |
> |
HybridCalculator *hhc = dynamic_cast<HybridCalculator*> (hc); |
601 |
> |
assert(hhc); |
602 |
> |
|
603 |
> |
hhc->SetToys(ntoys,ntoys/mNToysRatio); // can use less ntoys for b hypothesis |
604 |
> |
|
605 |
> |
// remove global observables from ModelConfig (this is probably not needed anymore in 5.32) |
606 |
> |
bModel->SetGlobalObservables(RooArgSet() ); |
607 |
> |
sbModel->SetGlobalObservables(RooArgSet() ); |
608 |
> |
|
609 |
> |
|
610 |
|
// check for nuisance prior pdf in case of nuisance parameters |
611 |
|
if (bModel->GetNuisanceParameters() || sbModel->GetNuisanceParameters() ) { |
612 |
|
RooAbsPdf * nuisPdf = 0; |
613 |
|
if (nuisPriorName) nuisPdf = w->pdf(nuisPriorName); |
614 |
|
// use prior defined first in bModel (then in SbModel) |
615 |
|
if (!nuisPdf) { |
616 |
< |
Info("StandardHypoTestInvDemo","No nuisance pdf given for the HybridCalculator - try to use the prior pdf from the model"); |
617 |
< |
nuisPdf = (bModel->GetPriorPdf() ) ? bModel->GetPriorPdf() : sbModel->GetPriorPdf(); |
618 |
< |
} |
619 |
< |
if (!nuisPdf) { |
620 |
< |
Error("StandardHypoTestInvDemo","Cannnot run Hybrid calculator because no prior on the nuisance parameter is specified"); |
621 |
< |
return 0; |
616 |
> |
Info("StandardHypoTestInvDemo","No nuisance pdf given for the HybridCalculator - try to deduce pdf from the model"); |
617 |
> |
if (bModel->GetPdf() && bModel->GetObservables() ) |
618 |
> |
nuisPdf = RooStats::MakeNuisancePdf(*bModel,"nuisancePdf_bmodel"); |
619 |
> |
else |
620 |
> |
nuisPdf = RooStats::MakeNuisancePdf(*sbModel,"nuisancePdf_sbmodel"); |
621 |
> |
} |
622 |
> |
if (!nuisPdf ) { |
623 |
> |
if (bModel->GetPriorPdf()) { |
624 |
> |
nuisPdf = bModel->GetPriorPdf(); |
625 |
> |
Info("StandardHypoTestInvDemo","No nuisance pdf given - try to use %s that is defined as a prior pdf in the B model",nuisPdf->GetName()); |
626 |
> |
} |
627 |
> |
else { |
628 |
> |
Error("StandardHypoTestInvDemo","Cannnot run Hybrid calculator because no prior on the nuisance parameter is specified or can be derived"); |
629 |
> |
return 0; |
630 |
> |
} |
631 |
|
} |
632 |
+ |
assert(nuisPdf); |
633 |
+ |
Info("StandardHypoTestInvDemo","Using as nuisance Pdf ... " ); |
634 |
+ |
nuisPdf->Print(); |
635 |
+ |
|
636 |
|
const RooArgSet * nuisParams = (bModel->GetNuisanceParameters() ) ? bModel->GetNuisanceParameters() : sbModel->GetNuisanceParameters(); |
637 |
|
RooArgSet * np = nuisPdf->getObservables(*nuisParams); |
638 |
|
if (np->getSize() == 0) { |
639 |
|
Warning("StandardHypoTestInvDemo","Prior nuisance does not depend on nuisance parameters. They will be smeared in their full range"); |
640 |
|
} |
641 |
|
delete np; |
642 |
+ |
|
643 |
|
hhc->ForcePriorNuisanceAlt(*nuisPdf); |
644 |
|
hhc->ForcePriorNuisanceNull(*nuisPdf); |
645 |
+ |
|
646 |
+ |
|
647 |
|
} |
648 |
|
} |
649 |
< |
else |
650 |
< |
((FrequentistCalculator*) hc)->SetToys(ntoys,ntoys); |
651 |
< |
|
649 |
> |
else if (type == 2) { |
650 |
> |
((AsymptoticCalculator*) hc)->SetOneSided(true); |
651 |
> |
// ((AsymptoticCalculator*) hc)->SetQTilde(true); // not needed should be done automatically now |
652 |
> |
((AsymptoticCalculator*) hc)->SetPrintLevel(mPrintLevel+1); |
653 |
> |
} |
654 |
> |
else if (type != 2) |
655 |
> |
((FrequentistCalculator*) hc)->SetToys(ntoys,ntoys/mNToysRatio); |
656 |
> |
|
657 |
|
// Get the result |
658 |
|
RooMsgService::instance().getStream(1).removeTopic(RooFit::NumIntegration); |
659 |
< |
|
660 |
< |
|
661 |
< |
TStopwatch tw; tw.Start(); |
302 |
< |
const RooArgSet * poiSet = sbModel->GetParametersOfInterest(); |
303 |
< |
RooRealVar *poi = (RooRealVar*)poiSet->first(); |
304 |
< |
|
305 |
< |
// fit the data first |
306 |
< |
sbModel->GetPdf()->fitTo(*data); |
307 |
< |
double poihat = poi->getVal(); |
308 |
< |
|
309 |
< |
|
659 |
> |
|
660 |
> |
|
661 |
> |
|
662 |
|
HypoTestInverter calc(*hc); |
663 |
|
calc.SetConfidenceLevel(0.95); |
664 |
< |
|
665 |
< |
calc.UseCLs(useCls); |
664 |
> |
|
665 |
> |
|
666 |
> |
calc.UseCLs(useCLs); |
667 |
|
calc.SetVerbose(true); |
668 |
< |
|
668 |
> |
|
669 |
|
// can speed up using proof-lite |
670 |
< |
if (useProof && nworkers > 1) { |
671 |
< |
ProofConfig pc(*w, nworkers, "", kFALSE); |
670 |
> |
if (mUseProof && mNWorkers > 1) { |
671 |
> |
ProofConfig pc(*w, mNWorkers, "", kFALSE); |
672 |
|
toymcs->SetProofConfig(&pc); // enable proof |
673 |
|
} |
674 |
< |
|
675 |
< |
|
674 |
> |
|
675 |
> |
|
676 |
|
if (npoints > 0) { |
677 |
< |
if (poimin >= poimax) { |
677 |
> |
if (poimin > poimax) { |
678 |
|
// if no min/max given scan between MLE and +4 sigma |
679 |
|
poimin = int(poihat); |
680 |
|
poimax = int(poihat + 4 * poi->getError()); |
681 |
|
} |
682 |
|
std::cout << "Doing a fixed scan in interval : " << poimin << " , " << poimax << std::endl; |
330 |
– |
//calc.SetFixedScan(1,6,6); |
683 |
|
calc.SetFixedScan(npoints,poimin,poimax); |
684 |
|
} |
685 |
|
else { |
686 |
|
//poi->setMax(10*int( (poihat+ 10 *poi->getError() )/10 ) ); |
687 |
|
std::cout << "Doing an automatic scan in interval : " << poi->getMin() << " , " << poi->getMax() << std::endl; |
688 |
|
} |
689 |
< |
|
689 |
> |
|
690 |
> |
tw.Start(); |
691 |
|
HypoTestInverterResult * r = calc.GetInterval(); |
692 |
< |
|
693 |
< |
if (rebuild) { |
692 |
> |
std::cout << "Time to perform limit scan \n"; |
693 |
> |
tw.Print(); |
694 |
> |
|
695 |
> |
if (mRebuild) { |
696 |
|
calc.SetCloseProof(1); |
697 |
< |
SamplingDistribution * limDist = calc.GetUpperLimitDistribution(true,nToyToRebuild); |
697 |
> |
tw.Start(); |
698 |
> |
SamplingDistribution * limDist = calc.GetUpperLimitDistribution(true,mNToyToRebuild); |
699 |
> |
std::cout << "Time to rebuild distributions " << std::endl; |
700 |
> |
tw.Print(); |
701 |
> |
|
702 |
|
if (limDist) { |
703 |
|
std::cout << "expected up limit " << limDist->InverseCDF(0.5) << " +/- " |
704 |
|
<< limDist->InverseCDF(0.16) << " " |
705 |
|
<< limDist->InverseCDF(0.84) << "\n"; |
706 |
+ |
|
707 |
+ |
//update r to a new updated result object containing the rebuilt expected p-values distributions |
708 |
+ |
// (it will not recompute the expected limit) |
709 |
+ |
if (r) delete r; // need to delete previous object since GetInterval will return a cloned copy |
710 |
+ |
r = calc.GetInterval(); |
711 |
+ |
|
712 |
|
} |
713 |
|
else |
714 |
|
std::cout << "ERROR : failed to re-build distributions " << std::endl; |
715 |
|
} |
716 |
+ |
|
717 |
+ |
return r; |
718 |
+ |
} |
719 |
|
|
352 |
– |
//update r to a new re-freshed copied |
353 |
– |
r = calc.GetInterval(); |
354 |
– |
|
355 |
– |
delete hc; |
720 |
|
|
357 |
– |
return r; |
358 |
– |
} |
721 |
|
|
722 |
|
void ReadResult(const char * fileName, const char * resultName="", bool useCLs=true) { |
723 |
|
// read a previous stored result from a file given the result name |
725 |
|
StandardHypoTestInvDemo(fileName, resultName,"","","",0,0,useCLs); |
726 |
|
} |
727 |
|
|
728 |
+ |
|
729 |
+ |
#ifdef USE_AS_MAIN |
730 |
+ |
int main() { |
731 |
+ |
StandardHypoTestInvDemo(); |
732 |
+ |
} |
733 |
+ |
#endif |
734 |
+ |
|
735 |
+ |
|
736 |
+ |
|
737 |
+ |
|