1 |
/**********************************************************************************
|
2 |
* Project : TMVA - a Root-integrated toolkit for multivariate data analysis *
|
3 |
* Package : TMVA *
|
4 |
* Exectuable: TMVAClassificationApplication *
|
5 |
* *
|
6 |
* This macro provides a simple example on how to use the trained classifiers *
|
7 |
* within an analysis module *
|
8 |
**********************************************************************************/
|
9 |
|
10 |
#include <cstdlib>
|
11 |
#include <vector>
|
12 |
#include <iostream>
|
13 |
#include <map>
|
14 |
#include <string>
|
15 |
|
16 |
#include "TFile.h"
|
17 |
#include "TTree.h"
|
18 |
#include "TString.h"
|
19 |
#include "TSystem.h"
|
20 |
#include "TROOT.h"
|
21 |
#include "TStopwatch.h"
|
22 |
#include "TChain.h"
|
23 |
#include "TChainElement.h"
|
24 |
|
25 |
#include "Math/LorentzVector.h"
|
26 |
#include "Math/VectorUtil.h"
|
27 |
|
28 |
#include "TMVAGui.C"
|
29 |
|
30 |
#if not defined(__CINT__) || defined(__MAKECINT__)
|
31 |
#include "TMVA/Tools.h"
|
32 |
#include "TMVA/Reader.h"
|
33 |
#include "TMVA/MethodCuts.h"
|
34 |
#endif
|
35 |
|
36 |
using namespace TMVA;
|
37 |
typedef ROOT::Math::LorentzVector<ROOT::Math::PxPyPzE4D<double> > LorentzVector;
|
38 |
|
39 |
//--------------------------------------------------------------------
|
40 |
|
41 |
void fillUnderOverFlow(TH1F *h1, float value, float weight = 1)
|
42 |
{
|
43 |
float min = h1->GetXaxis()->GetXmin();
|
44 |
float max = h1->GetXaxis()->GetXmax();
|
45 |
|
46 |
if (value > max) value = h1->GetBinCenter(h1->GetNbinsX());
|
47 |
if (value < min) value = h1->GetBinCenter(1);
|
48 |
|
49 |
h1->Fill(value, weight);
|
50 |
}
|
51 |
|
52 |
//--------------------------------------------------------------------
|
53 |
|
54 |
void evaluateMVA_smurf( int mH , TString myMethodList = "" )
|
55 |
{
|
56 |
#ifdef __CINT__
|
57 |
gROOT->ProcessLine( ".O0" ); // turn off optimization in CINT
|
58 |
#endif
|
59 |
|
60 |
//--------------------------------------------------------------------
|
61 |
// path to weights dir (this is where MVA training info is stored)
|
62 |
// output root file will be stored at [path]/output
|
63 |
//--------------------------------------------------------------------
|
64 |
|
65 |
TString path = Form("/smurf/benhoob/MVA/SmurfTraining/hww%i_ww/",mH);
|
66 |
|
67 |
cout << "Looking for weights dir at " << path << endl;
|
68 |
|
69 |
//-----------------------------------
|
70 |
// select samples to run over
|
71 |
//-----------------------------------
|
72 |
|
73 |
const char* babyPath = "/smurf/benhoob/MVA/SmurfBabies/tas-2020";
|
74 |
|
75 |
cout << "Looking for smurf babies at " << babyPath << endl;
|
76 |
|
77 |
vector<char*> samples;
|
78 |
samples.push_back("ww");
|
79 |
samples.push_back("ggww");
|
80 |
samples.push_back("wz");
|
81 |
samples.push_back("zz");
|
82 |
samples.push_back("ttbar");
|
83 |
samples.push_back("wjets");
|
84 |
samples.push_back("dyee");
|
85 |
samples.push_back("dymm");
|
86 |
samples.push_back("dytt");
|
87 |
samples.push_back("tw");
|
88 |
samples.push_back("hww130");
|
89 |
samples.push_back("hww160");
|
90 |
samples.push_back("hww200");
|
91 |
samples.push_back("hww250");
|
92 |
|
93 |
//--------------------------------------------------------------------------------
|
94 |
// IMPORTANT: set the following variables to the same set used for MVA training!!!
|
95 |
//--------------------------------------------------------------------------------
|
96 |
|
97 |
std::map<std::string,int> mvaVar;
|
98 |
mvaVar[ "lep1pt" ] = 1; //pt of leading lepton
|
99 |
mvaVar[ "lep2pt" ] = 1; //pt of sub-leading lepton
|
100 |
mvaVar[ "dPhi" ] = 1; //delta phi btw leptons
|
101 |
mvaVar[ "dilmass" ] = 1; //dilepton mass
|
102 |
mvaVar[ "type" ] = 0; //dilepton flavor type
|
103 |
mvaVar[ "pmet" ] = 1; //projected met
|
104 |
mvaVar[ "met" ] = 0; //met
|
105 |
mvaVar[ "mt" ] = 1; //tranvserse higgs mass
|
106 |
mvaVar[ "mt1" ] = 1; //transverse mass of leading lepton and met
|
107 |
mvaVar[ "mt2" ] = 1; //transverse mass of sub-leading lepton and met
|
108 |
mvaVar[ "dPhiLep1MET" ] = 1; //delta phi btw leading lepton and met
|
109 |
mvaVar[ "dPhiLep2MET" ] = 0; //delta phi btw leading sub-lepton and met
|
110 |
|
111 |
//---------------------------------------------------------------
|
112 |
// specifies the selection applied to events in the training
|
113 |
//---------------------------------------------------------------
|
114 |
|
115 |
float dilmass_cut = 10000;
|
116 |
|
117 |
if ( mH == 130 ) dilmass_cut = 90.0;
|
118 |
else if( mH == 160 ) dilmass_cut = 100.0;
|
119 |
else if( mH == 200 ) dilmass_cut = 130.0;
|
120 |
else if( mH == 250 ) dilmass_cut = 250.0;
|
121 |
else{
|
122 |
std::cout << "Error, unrecognized higgs mass " << mH << " GeV, quitting" << std::endl;
|
123 |
exit(0);
|
124 |
}
|
125 |
|
126 |
cout << "Using dilepton mass < " << dilmass_cut << endl;
|
127 |
|
128 |
|
129 |
|
130 |
|
131 |
//----------------------------------------------------------------------------------
|
132 |
// DON'T MODIFY ANYTHING BELOW THIS LINE UNLESS YOU HAVE HOOBERMAN'S PERMISSION!!!
|
133 |
//----------------------------------------------------------------------------------
|
134 |
|
135 |
|
136 |
|
137 |
|
138 |
|
139 |
|
140 |
// This loads the library
|
141 |
TMVA::Tools::Instance();
|
142 |
|
143 |
// Default MVA methods to be trained + tested
|
144 |
std::map<std::string,int> Use;
|
145 |
|
146 |
// --- Cut optimisation
|
147 |
Use["Cuts"] = 1;
|
148 |
Use["CutsD"] = 1;
|
149 |
Use["CutsPCA"] = 0;
|
150 |
Use["CutsGA"] = 0;
|
151 |
Use["CutsSA"] = 0;
|
152 |
//
|
153 |
// --- 1-dimensional likelihood ("naive Bayes estimator")
|
154 |
Use["Likelihood"] = 1;
|
155 |
Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings)
|
156 |
Use["LikelihoodPCA"] = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
|
157 |
Use["LikelihoodKDE"] = 0;
|
158 |
Use["LikelihoodMIX"] = 0;
|
159 |
//
|
160 |
// --- Mutidimensional likelihood and Nearest-Neighbour methods
|
161 |
Use["PDERS"] = 1;
|
162 |
Use["PDERSD"] = 0;
|
163 |
Use["PDERSPCA"] = 0;
|
164 |
Use["PDEFoam"] = 1;
|
165 |
Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting
|
166 |
Use["KNN"] = 1; // k-nearest neighbour method
|
167 |
//
|
168 |
// --- Linear Discriminant Analysis
|
169 |
Use["LD"] = 1; // Linear Discriminant identical to Fisher
|
170 |
Use["Fisher"] = 0;
|
171 |
Use["FisherG"] = 0;
|
172 |
Use["BoostedFisher"] = 0; // uses generalised MVA method boosting
|
173 |
Use["HMatrix"] = 0;
|
174 |
//
|
175 |
// --- Function Discriminant analysis
|
176 |
Use["FDA_GA"] = 1; // minimisation of user-defined function using Genetics Algorithm
|
177 |
Use["FDA_SA"] = 0;
|
178 |
Use["FDA_MC"] = 0;
|
179 |
Use["FDA_MT"] = 0;
|
180 |
Use["FDA_GAMT"] = 0;
|
181 |
Use["FDA_MCMT"] = 0;
|
182 |
//
|
183 |
// --- Neural Networks (all are feed-forward Multilayer Perceptrons)
|
184 |
Use["MLP"] = 0; // Recommended ANN
|
185 |
Use["MLPBFGS"] = 0; // Recommended ANN with optional training method
|
186 |
Use["MLPBNN"] = 1; // Recommended ANN with BFGS training method and bayesian regulator
|
187 |
Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH
|
188 |
Use["TMlpANN"] = 0; // ROOT's own ANN
|
189 |
//
|
190 |
// --- Support Vector Machine
|
191 |
Use["SVM"] = 1;
|
192 |
//
|
193 |
// --- Boosted Decision Trees
|
194 |
Use["BDT"] = 1; // uses Adaptive Boost
|
195 |
Use["BDTG"] = 0; // uses Gradient Boost
|
196 |
Use["BDTB"] = 0; // uses Bagging
|
197 |
Use["BDTD"] = 0; // decorrelation + Adaptive Boost
|
198 |
//
|
199 |
// --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
|
200 |
Use["RuleFit"] = 1;
|
201 |
// ---------------------------------------------------------------
|
202 |
Use["Plugin"] = 0;
|
203 |
Use["Category"] = 0;
|
204 |
Use["SVM_Gauss"] = 0;
|
205 |
Use["SVM_Poly"] = 0;
|
206 |
Use["SVM_Lin"] = 0;
|
207 |
|
208 |
std::cout << std::endl;
|
209 |
std::cout << "==> Start TMVAClassificationApplication" << std::endl;
|
210 |
|
211 |
// Select methods (don't look at this code - not of interest)
|
212 |
if (myMethodList != "") {
|
213 |
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
|
214 |
|
215 |
std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
|
216 |
for (UInt_t i=0; i<mlist.size(); i++) {
|
217 |
std::string regMethod(mlist[i]);
|
218 |
|
219 |
if (Use.find(regMethod) == Use.end()) {
|
220 |
std::cout << "Method \"" << regMethod
|
221 |
<< "\" not known in TMVA under this name. Choose among the following:" << std::endl;
|
222 |
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) {
|
223 |
std::cout << it->first << " ";
|
224 |
}
|
225 |
std::cout << std::endl;
|
226 |
return;
|
227 |
}
|
228 |
Use[regMethod] = 1;
|
229 |
}
|
230 |
}
|
231 |
|
232 |
// --------------------------------------------------------------------------------------------------
|
233 |
|
234 |
const unsigned int nsamples = samples.size();
|
235 |
|
236 |
for( unsigned int i = 0 ; i < nsamples ; ++i ){
|
237 |
|
238 |
// --- Create the Reader object
|
239 |
|
240 |
TMVA::Reader *reader = new TMVA::Reader( "!Color:!Silent" );
|
241 |
|
242 |
// Create a set of variables and declare them to the reader
|
243 |
// - the variable names MUST corresponds in name and type to those given in the weight file(s) used
|
244 |
// Float_t var1, var2;
|
245 |
// Float_t var3, var4;
|
246 |
// reader->AddVariable( "myvar1 := var1+var2", &var1 );
|
247 |
// reader->AddVariable( "myvar2 := var1-var2", &var2 );
|
248 |
// reader->AddVariable( "var3", &var3 );
|
249 |
// reader->AddVariable( "var4", &var4 );
|
250 |
|
251 |
Float_t lep1pt;
|
252 |
Float_t lep2pt;
|
253 |
Float_t dPhi;
|
254 |
Float_t dilmass;
|
255 |
Float_t type;
|
256 |
Float_t pmet;
|
257 |
Float_t met;
|
258 |
Float_t mt;
|
259 |
Float_t mt1;
|
260 |
Float_t mt2;
|
261 |
Float_t dPhiLep1MET;
|
262 |
Float_t dPhiLep2MET;
|
263 |
|
264 |
if (mvaVar["lep1pt"]) reader->AddVariable( "lep1pt", &lep1pt );
|
265 |
if (mvaVar["lep2pt"]) reader->AddVariable( "lep2pt", &lep2pt );
|
266 |
if (mvaVar["dPhi"]) reader->AddVariable( "dPhi", &dPhi );
|
267 |
if (mvaVar["dilmass"]) reader->AddVariable( "dilmass", &dilmass );
|
268 |
if (mvaVar["type"]) reader->AddVariable( "type", &type );
|
269 |
if (mvaVar["pmet"]) reader->AddVariable( "pmet", &pmet );
|
270 |
if (mvaVar["met"]) reader->AddVariable( "met", &met );
|
271 |
if (mvaVar["mt"]) reader->AddVariable( "mt", &mt );
|
272 |
if (mvaVar["mt1"]) reader->AddVariable( "mt1", &mt1 );
|
273 |
if (mvaVar["mt2"]) reader->AddVariable( "mt2", &mt2 );
|
274 |
if (mvaVar["dPhiLep1MET"]) reader->AddVariable( "dPhiLep1MET", &dPhiLep1MET );
|
275 |
if (mvaVar["dPhiLep2MET"]) reader->AddVariable( "dPhiLep2MET", &dPhiLep2MET );
|
276 |
|
277 |
// Spectator variables declared in the training have to be added to the reader, too
|
278 |
// Float_t spec1,spec2;
|
279 |
// reader->AddSpectator( "spec1 := var1*2", &spec1 );
|
280 |
// reader->AddSpectator( "spec2 := var1*3", &spec2 );
|
281 |
|
282 |
Float_t Category_cat1, Category_cat2, Category_cat3;
|
283 |
if (Use["Category"]){
|
284 |
// Add artificial spectators for distinguishing categories
|
285 |
// reader->AddSpectator( "Category_cat1 := var3<=0", &Category_cat1 );
|
286 |
// reader->AddSpectator( "Category_cat2 := (var3>0)&&(var4<0)", &Category_cat2 );
|
287 |
// reader->AddSpectator( "Category_cat3 := (var3>0)&&(var4>=0)", &Category_cat3 );
|
288 |
}
|
289 |
|
290 |
// --- Book the MVA methods
|
291 |
|
292 |
//--------------------------------------------------------------------------------------
|
293 |
// tell evaluateMVA_smurf where to find the weights dir, which contains the trained MVA's.
|
294 |
// In this example, the weights dir is located at [path]/[dir]
|
295 |
// and the output root file is written to [path]/[output]
|
296 |
//--------------------------------------------------------------------------------------
|
297 |
|
298 |
TString dir = path + "weights/";
|
299 |
TString outdir = path + "output/";
|
300 |
TString prefix = "TMVAClassification";
|
301 |
|
302 |
// Book method(s)
|
303 |
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) {
|
304 |
if (it->second) {
|
305 |
TString methodName = TString(it->first) + TString(" method");
|
306 |
TString weightfile = dir + prefix + TString("_") + TString(it->first) + TString(".weights.xml");
|
307 |
reader->BookMVA( methodName, weightfile );
|
308 |
}
|
309 |
}
|
310 |
|
311 |
// Book output histograms
|
312 |
UInt_t nbin = 1000;
|
313 |
TH1F *histLk(0), *histLkD(0), *histLkPCA(0), *histLkKDE(0), *histLkMIX(0), *histPD(0), *histPDD(0);
|
314 |
TH1F *histPDPCA(0), *histPDEFoam(0), *histPDEFoamErr(0), *histPDEFoamSig(0), *histKNN(0), *histHm(0);
|
315 |
TH1F *histFi(0), *histFiG(0), *histFiB(0), *histLD(0), *histNn(0),*histNnbfgs(0),*histNnbnn(0);
|
316 |
TH1F *histNnC(0), *histNnT(0), *histBdt(0), *histBdtG(0), *histBdtD(0), *histRf(0), *histSVMG(0);
|
317 |
TH1F *histSVMP(0), *histSVML(0), *histFDAMT(0), *histFDAGA(0), *histCat(0), *histPBdt(0);
|
318 |
|
319 |
if (Use["Likelihood"]) histLk = new TH1F( "MVA_Likelihood", "MVA_Likelihood", nbin, -1, 1 );
|
320 |
if (Use["LikelihoodD"]) histLkD = new TH1F( "MVA_LikelihoodD", "MVA_LikelihoodD", nbin, -1, 0.9999 );
|
321 |
if (Use["LikelihoodPCA"]) histLkPCA = new TH1F( "MVA_LikelihoodPCA", "MVA_LikelihoodPCA", nbin, -1, 1 );
|
322 |
if (Use["LikelihoodKDE"]) histLkKDE = new TH1F( "MVA_LikelihoodKDE", "MVA_LikelihoodKDE", nbin, -0.00001, 0.99999 );
|
323 |
if (Use["LikelihoodMIX"]) histLkMIX = new TH1F( "MVA_LikelihoodMIX", "MVA_LikelihoodMIX", nbin, 0, 1 );
|
324 |
if (Use["PDERS"]) histPD = new TH1F( "MVA_PDERS", "MVA_PDERS", nbin, 0, 1 );
|
325 |
if (Use["PDERSD"]) histPDD = new TH1F( "MVA_PDERSD", "MVA_PDERSD", nbin, 0, 1 );
|
326 |
if (Use["PDERSPCA"]) histPDPCA = new TH1F( "MVA_PDERSPCA", "MVA_PDERSPCA", nbin, 0, 1 );
|
327 |
if (Use["KNN"]) histKNN = new TH1F( "MVA_KNN", "MVA_KNN", nbin, 0, 1 );
|
328 |
if (Use["HMatrix"]) histHm = new TH1F( "MVA_HMatrix", "MVA_HMatrix", nbin, -0.95, 1.55 );
|
329 |
if (Use["Fisher"]) histFi = new TH1F( "MVA_Fisher", "MVA_Fisher", nbin, -4, 4 );
|
330 |
if (Use["FisherG"]) histFiG = new TH1F( "MVA_FisherG", "MVA_FisherG", nbin, -1, 1 );
|
331 |
if (Use["BoostedFisher"]) histFiB = new TH1F( "MVA_BoostedFisher", "MVA_BoostedFisher", nbin, -2, 2 );
|
332 |
if (Use["LD"]) histLD = new TH1F( "MVA_LD", "MVA_LD", nbin, -2, 2 );
|
333 |
if (Use["MLP"]) histNn = new TH1F( "MVA_MLP", "MVA_MLP", nbin, -1.25, 1.5 );
|
334 |
if (Use["MLPBFGS"]) histNnbfgs = new TH1F( "MVA_MLPBFGS", "MVA_MLPBFGS", nbin, -1.25, 1.5 );
|
335 |
if (Use["MLPBNN"]) histNnbnn = new TH1F( "MVA_MLPBNN", "MVA_MLPBNN", nbin, -1.25, 1.5 );
|
336 |
if (Use["CFMlpANN"]) histNnC = new TH1F( "MVA_CFMlpANN", "MVA_CFMlpANN", nbin, 0, 1 );
|
337 |
if (Use["TMlpANN"]) histNnT = new TH1F( "MVA_TMlpANN", "MVA_TMlpANN", nbin, -1.3, 1.3 );
|
338 |
if (Use["BDT"]) histBdt = new TH1F( "MVA_BDT", "MVA_BDT", nbin, -1. , 1. );
|
339 |
if (Use["BDTD"]) histBdtD = new TH1F( "MVA_BDTD", "MVA_BDTD", nbin, -0.8, 0.8 );
|
340 |
if (Use["BDTG"]) histBdtG = new TH1F( "MVA_BDTG", "MVA_BDTG", nbin, -1.0, 1.0 );
|
341 |
if (Use["RuleFit"]) histRf = new TH1F( "MVA_RuleFit", "MVA_RuleFit", nbin, -2.0, 2.0 );
|
342 |
if (Use["SVM_Gauss"]) histSVMG = new TH1F( "MVA_SVM_Gauss", "MVA_SVM_Gauss", nbin, 0.0, 1.0 );
|
343 |
if (Use["SVM_Poly"]) histSVMP = new TH1F( "MVA_SVM_Poly", "MVA_SVM_Poly", nbin, 0.0, 1.0 );
|
344 |
if (Use["SVM_Lin"]) histSVML = new TH1F( "MVA_SVM_Lin", "MVA_SVM_Lin", nbin, 0.0, 1.0 );
|
345 |
if (Use["FDA_MT"]) histFDAMT = new TH1F( "MVA_FDA_MT", "MVA_FDA_MT", nbin, -2.0, 3.0 );
|
346 |
if (Use["FDA_GA"]) histFDAGA = new TH1F( "MVA_FDA_GA", "MVA_FDA_GA", nbin, -2.0, 3.0 );
|
347 |
if (Use["Category"]) histCat = new TH1F( "MVA_Category", "MVA_Category", nbin, -2., 2. );
|
348 |
if (Use["Plugin"]) histPBdt = new TH1F( "MVA_PBDT", "MVA_BDT", nbin, -0.8, 0.8 );
|
349 |
|
350 |
if (Use["Likelihood"]) histLk ->Sumw2();
|
351 |
if (Use["LikelihoodD"]) histLkD ->Sumw2();
|
352 |
if (Use["LikelihoodPCA"]) histLkPCA ->Sumw2();
|
353 |
if (Use["LikelihoodKDE"]) histLkKDE ->Sumw2();
|
354 |
if (Use["LikelihoodMIX"]) histLkMIX ->Sumw2();
|
355 |
if (Use["PDERS"]) histPD ->Sumw2();
|
356 |
if (Use["PDERSD"]) histPDD ->Sumw2();
|
357 |
if (Use["PDERSPCA"]) histPDPCA ->Sumw2();
|
358 |
if (Use["KNN"]) histKNN ->Sumw2();
|
359 |
if (Use["HMatrix"]) histHm ->Sumw2();
|
360 |
if (Use["Fisher"]) histFi ->Sumw2();
|
361 |
if (Use["FisherG"]) histFiG ->Sumw2();
|
362 |
if (Use["BoostedFisher"]) histFiB ->Sumw2();
|
363 |
if (Use["LD"]) histLD ->Sumw2();
|
364 |
if (Use["MLP"]) histNn ->Sumw2();
|
365 |
if (Use["MLPBFGS"]) histNnbfgs ->Sumw2();
|
366 |
if (Use["MLPBNN"]) histNnbnn ->Sumw2();
|
367 |
if (Use["CFMlpANN"]) histNnC ->Sumw2();
|
368 |
if (Use["TMlpANN"]) histNnT ->Sumw2();
|
369 |
if (Use["BDT"]) histBdt ->Sumw2();
|
370 |
if (Use["BDTD"]) histBdtD ->Sumw2();
|
371 |
if (Use["BDTG"]) histBdtG ->Sumw2();
|
372 |
if (Use["RuleFit"]) histRf ->Sumw2();
|
373 |
if (Use["SVM_Gauss"]) histSVMG ->Sumw2();
|
374 |
if (Use["SVM_Poly"]) histSVMP ->Sumw2();
|
375 |
if (Use["SVM_Lin"]) histSVML ->Sumw2();
|
376 |
if (Use["FDA_MT"]) histFDAMT ->Sumw2();
|
377 |
if (Use["FDA_GA"]) histFDAGA ->Sumw2();
|
378 |
if (Use["Category"]) histCat ->Sumw2();
|
379 |
if (Use["Plugin"]) histPBdt ->Sumw2();
|
380 |
|
381 |
// PDEFoam also returns per-event error, fill in histogram, and also fill significance
|
382 |
if (Use["PDEFoam"]) {
|
383 |
histPDEFoam = new TH1F( "MVA_PDEFoam", "MVA_PDEFoam", nbin, 0, 1 );
|
384 |
histPDEFoamErr = new TH1F( "MVA_PDEFoamErr", "MVA_PDEFoam error", nbin, 0, 1 );
|
385 |
histPDEFoamSig = new TH1F( "MVA_PDEFoamSig", "MVA_PDEFoam significance", nbin, 0, 10 );
|
386 |
}
|
387 |
|
388 |
// Book example histogram for probability (the other methods are done similarly)
|
389 |
TH1F *probHistFi(0), *rarityHistFi(0);
|
390 |
if (Use["Fisher"]) {
|
391 |
probHistFi = new TH1F( "MVA_Fisher_Proba", "MVA_Fisher_Proba", nbin, 0, 1 );
|
392 |
rarityHistFi = new TH1F( "MVA_Fisher_Rarity", "MVA_Fisher_Rarity", nbin, 0, 1 );
|
393 |
}
|
394 |
|
395 |
// Prepare input tree (this must be replaced by your data source)
|
396 |
// in this example, there is a toy tree with signal and one with background events
|
397 |
// we'll later on use only the "signal" events for the test in this example.
|
398 |
//
|
399 |
|
400 |
TChain *ch = new TChain("tree");
|
401 |
ch -> Add( Form("%s/%s.root" , babyPath,samples.at(i)) );
|
402 |
|
403 |
TFile *f = TFile::Open( Form("%s/%s.root" , babyPath , samples.at(i)) );
|
404 |
assert(f);
|
405 |
TTree* t = (TTree*)f->Get("tree");
|
406 |
assert(t);
|
407 |
t->SetBranchStatus("*", 1);
|
408 |
|
409 |
const char* mydir = outdir;
|
410 |
TFile *out = TFile::Open( Form("%s/%s.root" , mydir, samples.at(i) ) ,"RECREATE" );
|
411 |
TTree *clone;
|
412 |
clone = t->CloneTree(-1, "fast");
|
413 |
|
414 |
Float_t bdt;
|
415 |
Float_t nn;
|
416 |
Float_t fisher;
|
417 |
Int_t test;
|
418 |
|
419 |
TBranch* br_bdt = 0;
|
420 |
TBranch* br_nn = 0;
|
421 |
TBranch* br_fisher = 0;
|
422 |
TBranch* br_test = clone->Branch(Form("test_hww%i_ww",mH) , &test , Form("test_hww%i_ww/I" , mH) );
|
423 |
|
424 |
if(Use["BDT"]) br_bdt = clone->Branch(Form("bdt_hww%i_ww" ,mH) , &bdt , Form("bdt_hww%i_ww/F" ,mH) );
|
425 |
if(Use["MLPBNN"]) br_nn = clone->Branch(Form("nn_hww%i_ww" ,mH) , &nn , Form("nn_hww%i_ww/F" ,mH) );
|
426 |
if(Use["Fisher"]) br_fisher = clone->Branch(Form("fisher_hww%i_ww" ,mH) , &fisher , Form("fisher_hww%i_ww/F" ,mH) );
|
427 |
|
428 |
if(Use["BDT"]) br_bdt -> SetTitle(Form("BDT Output H%i" , mH));
|
429 |
if(Use["MLPBNN"]) br_nn -> SetTitle(Form("MLPBNN Output H%i" , mH));
|
430 |
if(Use["Fisher"]) br_fisher -> SetTitle(Form("Fisher Output H%i" , mH));
|
431 |
|
432 |
//clone->SetAlias("bdt", "bdt");
|
433 |
//clone->SetAlias("nn", "nn");
|
434 |
|
435 |
// --- Event loop
|
436 |
|
437 |
// Prepare the event tree
|
438 |
// - here the variable names have to corresponds to your tree
|
439 |
// - you can use the same variables as above which is slightly faster,
|
440 |
// but of course you can use different ones and copy the values inside the event loop
|
441 |
//
|
442 |
|
443 |
TTree *theTree = (TTree*) ch;
|
444 |
|
445 |
std::cout << "--- Using input files: -------------------" << std::endl;
|
446 |
|
447 |
TObjArray *listOfFiles = ch->GetListOfFiles();
|
448 |
TIter fileIter(listOfFiles);
|
449 |
TChainElement* currentFile = 0;
|
450 |
|
451 |
while((currentFile = (TChainElement*)fileIter.Next())) {
|
452 |
std::cout << currentFile->GetTitle() << std::endl;
|
453 |
}
|
454 |
|
455 |
UInt_t evtype_;
|
456 |
UInt_t event_;
|
457 |
Float_t scale1fb_;
|
458 |
LorentzVector* lep1_ = 0;
|
459 |
LorentzVector* lep2_ = 0;
|
460 |
Float_t dPhi_;
|
461 |
LorentzVector* dilep_ = 0;
|
462 |
UInt_t type_;
|
463 |
Float_t pmet_;
|
464 |
Float_t met_;
|
465 |
Float_t mt_;
|
466 |
Float_t mt1_;
|
467 |
Float_t mt2_;
|
468 |
Float_t dPhiLep1MET_;
|
469 |
Float_t dPhiLep2MET_;
|
470 |
|
471 |
theTree->SetBranchAddress( "evtype" , &evtype_ );
|
472 |
theTree->SetBranchAddress( "event" , &event_ );
|
473 |
theTree->SetBranchAddress( "scale1fb" , &scale1fb_ );
|
474 |
theTree->SetBranchAddress( "lep1" , &lep1_ );
|
475 |
theTree->SetBranchAddress( "lep2" , &lep2_ );
|
476 |
theTree->SetBranchAddress( "dPhi" , &dPhi_ );
|
477 |
theTree->SetBranchAddress( "dilep" , &dilep_ );
|
478 |
theTree->SetBranchAddress( "type" , &type_ );
|
479 |
theTree->SetBranchAddress( "pmet" , &pmet_ );
|
480 |
theTree->SetBranchAddress( "met" , &met_ );
|
481 |
theTree->SetBranchAddress( "mt" , &mt_ );
|
482 |
theTree->SetBranchAddress( "mt1" , &mt1_ );
|
483 |
theTree->SetBranchAddress( "mt2" , &mt2_ );
|
484 |
theTree->SetBranchAddress( "dPhiLep1MET" , &dPhiLep1MET_ );
|
485 |
//theTree->SetBranchAddress( "dPhiLep2MET" , &dPhiLep2MET_ );
|
486 |
|
487 |
// Efficiency calculator for cut method
|
488 |
Int_t nSelCutsGA = 0;
|
489 |
Double_t effS = 0.7;
|
490 |
|
491 |
std::vector<Float_t> vecVar(4); // vector for EvaluateMVA tests
|
492 |
|
493 |
std::cout << "--- Processing: " << theTree->GetEntries() << " events" << std::endl;
|
494 |
TStopwatch sw;
|
495 |
sw.Start();
|
496 |
|
497 |
int npass = 0;
|
498 |
float yield = 0.;
|
499 |
|
500 |
for (Long64_t ievt=0; ievt<theTree->GetEntries();ievt++) {
|
501 |
|
502 |
if (ievt%1000 == 0) std::cout << "--- ... Processing event: " << ievt << std::endl;
|
503 |
|
504 |
theTree->GetEntry(ievt);
|
505 |
|
506 |
//--------------------------------------------------------
|
507 |
// important: here we associate branches to MVA variables
|
508 |
//--------------------------------------------------------
|
509 |
|
510 |
lep1pt = lep1_->pt();
|
511 |
lep2pt = lep2_->pt();
|
512 |
dPhi = dPhi_;
|
513 |
dilmass = dilep_->mass();
|
514 |
type = type_;
|
515 |
pmet = pmet_;
|
516 |
met = met_;
|
517 |
mt = mt_;
|
518 |
mt1 = mt1_;
|
519 |
mt2 = mt2_;
|
520 |
dPhiLep1MET = dPhiLep1MET_;
|
521 |
//dPhiLep2MET = dPhiLep2MET_;
|
522 |
|
523 |
npass++;
|
524 |
yield+=scale1fb_;
|
525 |
|
526 |
// --- Return the MVA outputs and fill into histograms
|
527 |
|
528 |
|
529 |
if (Use["BDT"]){
|
530 |
bdt = reader->EvaluateMVA( "BDT method" );
|
531 |
br_bdt->Fill();
|
532 |
}
|
533 |
if (Use["MLPBNN"]){
|
534 |
nn = reader->EvaluateMVA( "MLPBNN method" );
|
535 |
br_nn->Fill();
|
536 |
}
|
537 |
if (Use["Fisher"]){
|
538 |
fisher = reader->EvaluateMVA( "Fisher method" );
|
539 |
br_fisher->Fill();
|
540 |
}
|
541 |
|
542 |
|
543 |
test = 0;
|
544 |
if( evtype_ == 0 && dilep_->mass() < dilmass_cut && event_ %2 == 1 ) test = 1;
|
545 |
br_test->Fill();
|
546 |
|
547 |
|
548 |
if (Use["CutsGA"]) {
|
549 |
// Cuts is a special case: give the desired signal efficienciy
|
550 |
Bool_t passed = reader->EvaluateMVA( "CutsGA method", effS );
|
551 |
if (passed) nSelCutsGA++;
|
552 |
}
|
553 |
|
554 |
if (Use["Likelihood" ]) histLk ->Fill( reader->EvaluateMVA( "Likelihood method" ) , scale1fb_);
|
555 |
if (Use["LikelihoodD" ]) histLkD ->Fill( reader->EvaluateMVA( "LikelihoodD method" ) , scale1fb_);
|
556 |
if (Use["LikelihoodPCA"]) histLkPCA ->Fill( reader->EvaluateMVA( "LikelihoodPCA method" ) , scale1fb_);
|
557 |
if (Use["LikelihoodKDE"]) histLkKDE ->Fill( reader->EvaluateMVA( "LikelihoodKDE method" ) , scale1fb_);
|
558 |
if (Use["LikelihoodMIX"]) histLkMIX ->Fill( reader->EvaluateMVA( "LikelihoodMIX method" ) , scale1fb_);
|
559 |
if (Use["PDERS" ]) histPD ->Fill( reader->EvaluateMVA( "PDERS method" ) , scale1fb_);
|
560 |
if (Use["PDERSD" ]) histPDD ->Fill( reader->EvaluateMVA( "PDERSD method" ) , scale1fb_);
|
561 |
if (Use["PDERSPCA" ]) histPDPCA ->Fill( reader->EvaluateMVA( "PDERSPCA method" ) , scale1fb_);
|
562 |
if (Use["KNN" ]) histKNN ->Fill( reader->EvaluateMVA( "KNN method" ) , scale1fb_);
|
563 |
if (Use["HMatrix" ]) histHm ->Fill( reader->EvaluateMVA( "HMatrix method" ) , scale1fb_);
|
564 |
if (Use["Fisher" ]) histFi ->Fill( reader->EvaluateMVA( "Fisher method" ) , scale1fb_);
|
565 |
if (Use["FisherG" ]) histFiG ->Fill( reader->EvaluateMVA( "FisherG method" ) , scale1fb_);
|
566 |
if (Use["BoostedFisher"]) histFiB ->Fill( reader->EvaluateMVA( "BoostedFisher method" ) , scale1fb_);
|
567 |
if (Use["LD" ]) histLD ->Fill( reader->EvaluateMVA( "LD method" ) , scale1fb_);
|
568 |
if (Use["MLP" ]) histNn ->Fill( reader->EvaluateMVA( "MLP method" ) , scale1fb_);
|
569 |
if (Use["MLPBFGS" ]) histNnbfgs ->Fill( reader->EvaluateMVA( "MLPBFGS method" ) , scale1fb_);
|
570 |
if (Use["MLPBNN" ]) histNnbnn ->Fill( reader->EvaluateMVA( "MLPBNN method" ) , scale1fb_);
|
571 |
if (Use["CFMlpANN" ]) histNnC ->Fill( reader->EvaluateMVA( "CFMlpANN method" ) , scale1fb_);
|
572 |
if (Use["TMlpANN" ]) histNnT ->Fill( reader->EvaluateMVA( "TMlpANN method" ) , scale1fb_);
|
573 |
if (Use["BDT" ]) histBdt ->Fill( reader->EvaluateMVA( "BDT method" ) , scale1fb_);
|
574 |
if (Use["BDTD" ]) histBdtD ->Fill( reader->EvaluateMVA( "BDTD method" ) , scale1fb_);
|
575 |
if (Use["BDTG" ]) histBdtG ->Fill( reader->EvaluateMVA( "BDTG method" ) , scale1fb_);
|
576 |
if (Use["RuleFit" ]) histRf ->Fill( reader->EvaluateMVA( "RuleFit method" ) , scale1fb_);
|
577 |
if (Use["SVM_Gauss" ]) histSVMG ->Fill( reader->EvaluateMVA( "SVM_Gauss method" ) , scale1fb_);
|
578 |
if (Use["SVM_Poly" ]) histSVMP ->Fill( reader->EvaluateMVA( "SVM_Poly method" ) , scale1fb_);
|
579 |
if (Use["SVM_Lin" ]) histSVML ->Fill( reader->EvaluateMVA( "SVM_Lin method" ) , scale1fb_);
|
580 |
if (Use["FDA_MT" ]) histFDAMT ->Fill( reader->EvaluateMVA( "FDA_MT method" ) , scale1fb_);
|
581 |
if (Use["FDA_GA" ]) histFDAGA ->Fill( reader->EvaluateMVA( "FDA_GA method" ) , scale1fb_);
|
582 |
if (Use["Category" ]) histCat ->Fill( reader->EvaluateMVA( "Category method" ) , scale1fb_);
|
583 |
if (Use["Plugin" ]) histPBdt ->Fill( reader->EvaluateMVA( "P_BDT method" ) , scale1fb_);
|
584 |
|
585 |
// Retrieve also per-event error
|
586 |
if (Use["PDEFoam"]) {
|
587 |
Double_t val = reader->EvaluateMVA( "PDEFoam method" );
|
588 |
Double_t err = reader->GetMVAError();
|
589 |
histPDEFoam ->Fill( val );
|
590 |
histPDEFoamErr->Fill( err );
|
591 |
if (err>1.e-50) histPDEFoamSig->Fill( val/err , scale1fb_);
|
592 |
}
|
593 |
|
594 |
// Retrieve probability instead of MVA output
|
595 |
if (Use["Fisher"]) {
|
596 |
probHistFi ->Fill( reader->GetProba ( "Fisher method" ) , scale1fb_ );
|
597 |
rarityHistFi->Fill( reader->GetRarity( "Fisher method" ) , scale1fb_ );
|
598 |
}
|
599 |
}
|
600 |
|
601 |
std::cout << npass << " events passing selection, yield " << yield << std::endl;
|
602 |
|
603 |
// Get elapsed time
|
604 |
sw.Stop();
|
605 |
std::cout << "--- End of event loop: "; sw.Print();
|
606 |
|
607 |
// Get efficiency for cuts classifier
|
608 |
if (Use["CutsGA"]) std::cout << "--- Efficiency for CutsGA method: " << double(nSelCutsGA)/theTree->GetEntries()
|
609 |
<< " (for a required signal efficiency of " << effS << ")" << std::endl;
|
610 |
|
611 |
if (Use["CutsGA"]) {
|
612 |
|
613 |
// test: retrieve cuts for particular signal efficiency
|
614 |
// CINT ignores dynamic_casts so we have to use a cuts-secific Reader function to acces the pointer
|
615 |
TMVA::MethodCuts* mcuts = reader->FindCutsMVA( "CutsGA method" ) ;
|
616 |
|
617 |
if (mcuts) {
|
618 |
std::vector<Double_t> cutsMin;
|
619 |
std::vector<Double_t> cutsMax;
|
620 |
mcuts->GetCuts( 0.7, cutsMin, cutsMax );
|
621 |
std::cout << "--- -------------------------------------------------------------" << std::endl;
|
622 |
std::cout << "--- Retrieve cut values for signal efficiency of 0.7 from Reader" << std::endl;
|
623 |
for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
|
624 |
std::cout << "... Cut: "
|
625 |
<< cutsMin[ivar]
|
626 |
<< " < \""
|
627 |
<< mcuts->GetInputVar(ivar)
|
628 |
<< "\" <= "
|
629 |
<< cutsMax[ivar] << std::endl;
|
630 |
}
|
631 |
std::cout << "--- -------------------------------------------------------------" << std::endl;
|
632 |
}
|
633 |
}
|
634 |
|
635 |
// --- write output baby
|
636 |
|
637 |
clone->Write();
|
638 |
out->Close();
|
639 |
f->Close();
|
640 |
|
641 |
// --- Write histograms
|
642 |
cout << "dir " << dir << endl;
|
643 |
|
644 |
TFile *target = new TFile( Form("%s/%s_histos.root",mydir,samples.at(i) ) ,"RECREATE" );
|
645 |
cout << "Writing to file " << Form("%s/%s_histos.root",mydir,samples.at(i) ) << endl;
|
646 |
|
647 |
if (Use["Likelihood" ]) histLk ->Write();
|
648 |
if (Use["LikelihoodD" ]) histLkD ->Write();
|
649 |
if (Use["LikelihoodPCA"]) histLkPCA ->Write();
|
650 |
if (Use["LikelihoodKDE"]) histLkKDE ->Write();
|
651 |
if (Use["LikelihoodMIX"]) histLkMIX ->Write();
|
652 |
if (Use["PDERS" ]) histPD ->Write();
|
653 |
if (Use["PDERSD" ]) histPDD ->Write();
|
654 |
if (Use["PDERSPCA" ]) histPDPCA ->Write();
|
655 |
if (Use["KNN" ]) histKNN ->Write();
|
656 |
if (Use["HMatrix" ]) histHm ->Write();
|
657 |
if (Use["Fisher" ]) histFi ->Write();
|
658 |
if (Use["FisherG" ]) histFiG ->Write();
|
659 |
if (Use["BoostedFisher"]) histFiB ->Write();
|
660 |
if (Use["LD" ]) histLD ->Write();
|
661 |
if (Use["MLP" ]) histNn ->Write();
|
662 |
if (Use["MLPBFGS" ]) histNnbfgs ->Write();
|
663 |
if (Use["MLPBNN" ]) histNnbnn ->Write();
|
664 |
if (Use["CFMlpANN" ]) histNnC ->Write();
|
665 |
if (Use["TMlpANN" ]) histNnT ->Write();
|
666 |
if (Use["BDT" ]) histBdt ->Write();
|
667 |
if (Use["BDTD" ]) histBdtD ->Write();
|
668 |
if (Use["BDTG" ]) histBdtG ->Write();
|
669 |
if (Use["RuleFit" ]) histRf ->Write();
|
670 |
if (Use["SVM_Gauss" ]) histSVMG ->Write();
|
671 |
if (Use["SVM_Poly" ]) histSVMP ->Write();
|
672 |
if (Use["SVM_Lin" ]) histSVML ->Write();
|
673 |
if (Use["FDA_MT" ]) histFDAMT ->Write();
|
674 |
if (Use["FDA_GA" ]) histFDAGA ->Write();
|
675 |
if (Use["Category" ]) histCat ->Write();
|
676 |
if (Use["Plugin" ]) histPBdt ->Write();
|
677 |
|
678 |
// Write also error and significance histos
|
679 |
if (Use["PDEFoam"]) { histPDEFoam->Write(); histPDEFoamErr->Write(); histPDEFoamSig->Write(); }
|
680 |
|
681 |
// Write also probability hists
|
682 |
if (Use["Fisher"]) { if (probHistFi != 0) probHistFi->Write(); if (rarityHistFi != 0) rarityHistFi->Write(); }
|
683 |
target->Close();
|
684 |
|
685 |
delete reader;
|
686 |
|
687 |
std::cout << "==> TMVAClassificationApplication is done with sample " << samples.at(i) << endl << std::endl;
|
688 |
}
|
689 |
}
|
690 |
|
691 |
#include "TFile.h"
|
692 |
#include "TTree.h"
|
693 |
#include "TBranch.h"
|
694 |
#include "TRandom.h"
|
695 |
|
696 |
using namespace std;
|
697 |
|
698 |
|