1 |
benhoob |
1.4 |
// @(#)root/tmva $Id: TMVA_HWW.C,v 1.3 2011/02/16 11:04:35 benhoob Exp $
|
2 |
benhoob |
1.1 |
/**********************************************************************************
|
3 |
|
|
* Project : TMVA - a ROOT-integrated toolkit for multivariate data analysis *
|
4 |
|
|
* Package : TMVA *
|
5 |
|
|
* Root Macro: TMVAClassification *
|
6 |
|
|
* *
|
7 |
|
|
* This macro provides examples for the training and testing of the *
|
8 |
|
|
* TMVA classifiers. *
|
9 |
|
|
* *
|
10 |
|
|
* As input data is used a toy-MC sample consisting of four Gaussian-distributed *
|
11 |
|
|
* and linearly correlated input variables. *
|
12 |
|
|
* *
|
13 |
|
|
* The methods to be used can be switched on and off by means of booleans, or *
|
14 |
|
|
* via the prompt command, for example: *
|
15 |
|
|
* *
|
16 |
|
|
* root -l ./TMVAClassification.C\(\"Fisher,Likelihood\"\) *
|
17 |
|
|
* *
|
18 |
|
|
* (note that the backslashes are mandatory) *
|
19 |
|
|
* If no method given, a default set of classifiers is used. *
|
20 |
|
|
* *
|
21 |
|
|
* The output file "TMVA.root" can be analysed with the use of dedicated *
|
22 |
|
|
* macros (simply say: root -l <macro.C>), which can be conveniently *
|
23 |
|
|
* invoked through a GUI that will appear at the end of the run of this macro. *
|
24 |
|
|
* Launch the GUI via the command: *
|
25 |
|
|
* *
|
26 |
|
|
* root -l ./TMVAGui.C *
|
27 |
|
|
* *
|
28 |
|
|
**********************************************************************************/
|
29 |
|
|
|
30 |
|
|
#include <cstdlib>
|
31 |
|
|
#include <iostream>
|
32 |
|
|
#include <map>
|
33 |
|
|
#include <string>
|
34 |
|
|
|
35 |
|
|
#include "TChain.h"
|
36 |
|
|
#include "TFile.h"
|
37 |
|
|
#include "TTree.h"
|
38 |
|
|
#include "TString.h"
|
39 |
|
|
#include "TObjString.h"
|
40 |
|
|
#include "TSystem.h"
|
41 |
|
|
#include "TROOT.h"
|
42 |
|
|
|
43 |
|
|
#include "TMVAGui.C"
|
44 |
|
|
|
45 |
|
|
#if not defined(__CINT__) || defined(__MAKECINT__)
|
46 |
|
|
// needs to be included when makecint runs (ACLIC)
|
47 |
|
|
#include "TMVA/Factory.h"
|
48 |
|
|
#include "TMVA/Tools.h"
|
49 |
|
|
#endif
|
50 |
|
|
|
51 |
benhoob |
1.3 |
|
52 |
benhoob |
1.1 |
void TMVA_HWW( TString myMethodList = "" )
|
53 |
|
|
{
|
54 |
|
|
// The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
|
55 |
|
|
// if you use your private .rootrc, or run from a different directory, please copy the
|
56 |
|
|
// corresponding lines from .rootrc
|
57 |
|
|
|
58 |
|
|
// methods to be processed can be given as an argument; use format:
|
59 |
|
|
//
|
60 |
|
|
// mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
|
61 |
|
|
//
|
62 |
|
|
// if you like to use a method via the plugin mechanism, we recommend using
|
63 |
|
|
//
|
64 |
|
|
// mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
|
65 |
|
|
// (an example is given for using the BDT as plugin (see below),
|
66 |
|
|
// but of course the real application is when you write your own
|
67 |
|
|
// method based)
|
68 |
|
|
|
69 |
benhoob |
1.3 |
|
70 |
|
|
//-----------------------------------------------------
|
71 |
|
|
// define event selection (store in TCut sel)
|
72 |
|
|
//-----------------------------------------------------
|
73 |
|
|
|
74 |
|
|
TCut met_projpt = "((event_type<0.5||event_type>2.5)&met_projpt>35)|((event_type>0.5&&event_type<2.5)&met_projpt>20)";
|
75 |
|
|
TCut pt2020 = "lephard_pt > 20 && lepsoft_pt > 20";
|
76 |
|
|
TCut pt2010 = "lephard_pt > 20 && lepsoft_pt > 10";
|
77 |
|
|
TCut jetveto = "jets_num==0 && extralep_num==0 && lowptbtags_num==0 && softmu_num==0";
|
78 |
|
|
TCut mll12 = "dil_mass > 12.";
|
79 |
benhoob |
1.4 |
TCut mll90 = "dil_mass < 90.";
|
80 |
|
|
TCut mll100 = "dil_mass < 100.";
|
81 |
|
|
TCut mll130 = "dil_mass < 130.";
|
82 |
benhoob |
1.3 |
TCut tight_el_ID = "lepsoft_passTighterId==1";
|
83 |
|
|
TCut mutype = "event_type < 1.5";
|
84 |
|
|
TCut eltype = "event_type > 1.5";
|
85 |
benhoob |
1.4 |
TCut elsoft15 = "event_type < 1.5 || lepsoft_pt > 15.";
|
86 |
|
|
|
87 |
|
|
//------------
|
88 |
|
|
//Higgs130
|
89 |
|
|
//------------
|
90 |
|
|
|
91 |
|
|
//TCut sel = pt2010 + met_projpt + jetveto + mll12 + mll90 + tight_el_ID;
|
92 |
|
|
//int mH = 130;
|
93 |
|
|
|
94 |
|
|
//------------
|
95 |
|
|
//Higgs160
|
96 |
|
|
//------------
|
97 |
benhoob |
1.3 |
|
98 |
benhoob |
1.4 |
TCut sel = pt2020 + met_projpt + jetveto + mll12 + mll100;
|
99 |
|
|
int mH = 160;
|
100 |
|
|
|
101 |
|
|
//------------
|
102 |
|
|
//Higgs200
|
103 |
|
|
//------------
|
104 |
|
|
|
105 |
|
|
//TCut sel = pt2020 + met_projpt + jetveto + mll12 + mll130;
|
106 |
|
|
//int mH = 200;
|
107 |
|
|
|
108 |
benhoob |
1.3 |
//-----------------------------------------------------
|
109 |
|
|
// choose which variables to include in MVA training
|
110 |
|
|
//-----------------------------------------------------
|
111 |
|
|
|
112 |
|
|
std::map<std::string,int> mvaVar;
|
113 |
benhoob |
1.4 |
mvaVar[ "lephard_pt" ] = 1;
|
114 |
|
|
mvaVar[ "lepsoft_pt" ] = 1;
|
115 |
|
|
mvaVar[ "dil_dphi" ] = 1;
|
116 |
|
|
mvaVar[ "dil_mass" ] = 1;
|
117 |
|
|
mvaVar[ "event_type" ] = 0;
|
118 |
|
|
mvaVar[ "met_projpt" ] = 0;
|
119 |
|
|
mvaVar[ "met_pt" ] = 0;
|
120 |
|
|
mvaVar[ "mt_lephardmet" ] = 0;
|
121 |
|
|
mvaVar[ "mt_lepsoftmet" ] = 0;
|
122 |
|
|
mvaVar[ "mthiggs" ] = 0;
|
123 |
|
|
mvaVar[ "dphi_lephardmet" ] = 0;
|
124 |
|
|
mvaVar[ "dphi_lepsoftmet" ] = 0;
|
125 |
|
|
mvaVar[ "lepsoft_fbrem" ] = 0;
|
126 |
|
|
mvaVar[ "lepsoft_eOverPIn" ] = 0;
|
127 |
|
|
mvaVar[ "lepsoft_qdphi" ] = 0;
|
128 |
benhoob |
1.3 |
|
129 |
|
|
//---------------------------------
|
130 |
|
|
//choose bkg samples to include
|
131 |
|
|
//---------------------------------
|
132 |
|
|
|
133 |
|
|
const char* babyPath = "/tas/cerati/HtoWWmvaBabies/latest";
|
134 |
|
|
|
135 |
|
|
TChain *chbackground = new TChain("Events");
|
136 |
benhoob |
1.4 |
chbackground->Add(Form("%s/WWTo2L2Nu_PU_testFinal_baby.root",babyPath));
|
137 |
|
|
chbackground->Add(Form("%s/GluGluToWWTo4L_PU_testFinal_baby.root",babyPath));
|
138 |
|
|
// chbackground->Add(Form("%s/WZ_PU_testFinal_baby.root",babyPath));
|
139 |
|
|
// chbackground->Add(Form("%s/ZZ_PU_testFinal_baby.root",babyPath));
|
140 |
|
|
// chbackground->Add(Form("%s/TTJets_PU_testFinal_baby.root",babyPath));
|
141 |
|
|
// chbackground->Add(Form("%s/tW_PU_testFinal_baby.root",babyPath));
|
142 |
|
|
// chbackground->Add(Form("%s/WJetsToLNu_PU_testFinal_baby.root",babyPath));
|
143 |
|
|
// chbackground->Add(Form("%s/DYToMuMuM20_PU_testFinal_baby.root",babyPath) );
|
144 |
|
|
// chbackground->Add(Form("%s/DYToMuMuM10To20_PU_testFinal_baby.root",babyPath) );
|
145 |
|
|
// chbackground->Add(Form("%s/DYToEEM20_PU_testFinal_baby.root",babyPath) );
|
146 |
|
|
// chbackground->Add(Form("%s/DYToEEM10To20_PU_testFinal_baby.root",babyPath) );
|
147 |
|
|
// chbackground->Add(Form("%s/DYToTauTauM20_PU_testFinal_baby.root",babyPath) );
|
148 |
|
|
// chbackground->Add(Form("%s/DYToTauTauM10To20_PU_testFinal_baby.root",babyPath) );
|
149 |
|
|
|
150 |
|
|
// chbackground->Add(Form("%s/WJetsToLNu_PU_testFinal_baby_FO1.root",babyPath));
|
151 |
|
|
// chbackground->Add(Form("%s/WJetsToLNu_PU_testFinal_baby_FO2.root",babyPath));
|
152 |
|
|
// chbackground->Add(Form("%s/WJetsToLNu_PU_testFinal_baby_FO3.root",babyPath));
|
153 |
|
|
// chbackground->Add(Form("%s/WToLNu_FOv3_testFinal_baby.root",babyPath));
|
154 |
|
|
// chbackground->Add(Form("%s/WJetsToLNu_PU_testFinal_baby_FO4.root",babyPath));
|
155 |
|
|
|
156 |
|
|
//---------------------------------
|
157 |
|
|
//choose signal sample to include
|
158 |
|
|
//---------------------------------
|
159 |
benhoob |
1.3 |
|
160 |
benhoob |
1.4 |
TChain *chsignal = new TChain("Events");
|
161 |
benhoob |
1.3 |
|
162 |
benhoob |
1.4 |
if( mH == 130 ){
|
163 |
|
|
chsignal->Add(Form("%s/HToWWTo2L2NuM130_PU_testFinal_baby.root",babyPath));
|
164 |
|
|
chsignal->Add(Form("%s/HToWWToLNuTauNuM130_PU_testFinal_baby.root",babyPath));
|
165 |
|
|
chsignal->Add(Form("%s/HToWWTo2Tau2NuM130_PU_testFinal_baby.root",babyPath));
|
166 |
|
|
}
|
167 |
|
|
else if( mH == 160 ){
|
168 |
|
|
chsignal->Add(Form("%s/HToWWTo2L2NuM160_PU_testFinal_baby.root",babyPath));
|
169 |
|
|
chsignal->Add(Form("%s/HToWWToLNuTauNuM160_PU_testFinal_baby.root",babyPath));
|
170 |
|
|
chsignal->Add(Form("%s/HToWWTo2Tau2NuM160_PU_testFinal_baby.root",babyPath));
|
171 |
|
|
}
|
172 |
|
|
else if( mH == 200 ){
|
173 |
|
|
chsignal->Add(Form("%s/HToWWTo2L2NuM200_PU_testFinal_baby.root",babyPath));
|
174 |
|
|
chsignal->Add(Form("%s/HToWWToLNuTauNuM200_PU_testFinal_baby.root",babyPath));
|
175 |
|
|
chsignal->Add(Form("%s/HToWWTo2Tau2NuM200_PU_testFinal_baby.root",babyPath));
|
176 |
|
|
}
|
177 |
|
|
else{
|
178 |
|
|
std::cout << "Error, unrecognized higgs mass " << mH << " GeV, quitting" << std::endl;
|
179 |
|
|
exit(0);
|
180 |
|
|
}
|
181 |
|
|
|
182 |
|
|
//-----------------------------------------------------
|
183 |
|
|
// choose backgrounds to include for multiple outputs
|
184 |
|
|
//-----------------------------------------------------
|
185 |
|
|
|
186 |
|
|
bool doMultipleOutputs = false;
|
187 |
|
|
|
188 |
|
|
TChain *chww = new TChain("Events");
|
189 |
|
|
chww->Add(Form("%s/WWTo2L2Nu_PU_testFinal_baby.root",babyPath));
|
190 |
|
|
chww->Add(Form("%s/GluGluToWWTo4L_PU_testFinal_baby.root",babyPath));
|
191 |
|
|
|
192 |
|
|
TChain *chwjets = new TChain("Events");
|
193 |
|
|
chwjets->Add(Form("%s/WJetsToLNu_PU_testFinal_baby.root",babyPath));
|
194 |
|
|
|
195 |
|
|
TChain *chtt = new TChain("Events");
|
196 |
|
|
chtt->Add(Form("%s/TTJets_PU_testFinal_baby.root",babyPath));
|
197 |
|
|
|
198 |
|
|
std::map<std::string,int> includeBkg;
|
199 |
|
|
includeBkg["ww"] = 1;
|
200 |
|
|
includeBkg["wjets"] = 0;
|
201 |
|
|
includeBkg["tt"] = 0;
|
202 |
benhoob |
1.3 |
|
203 |
benhoob |
1.1 |
//---------------------------------------------------------------
|
204 |
|
|
// This loads the library
|
205 |
|
|
TMVA::Tools::Instance();
|
206 |
|
|
|
207 |
|
|
// Default MVA methods to be trained + tested
|
208 |
|
|
std::map<std::string,int> Use;
|
209 |
|
|
|
210 |
|
|
// --- Cut optimisation
|
211 |
|
|
Use["Cuts"] = 1;
|
212 |
|
|
Use["CutsD"] = 1;
|
213 |
|
|
Use["CutsPCA"] = 0;
|
214 |
|
|
Use["CutsGA"] = 0;
|
215 |
|
|
Use["CutsSA"] = 0;
|
216 |
|
|
//
|
217 |
|
|
// --- 1-dimensional likelihood ("naive Bayes estimator")
|
218 |
|
|
Use["Likelihood"] = 1;
|
219 |
|
|
Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings)
|
220 |
|
|
Use["LikelihoodPCA"] = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
|
221 |
|
|
Use["LikelihoodKDE"] = 0;
|
222 |
|
|
Use["LikelihoodMIX"] = 0;
|
223 |
|
|
//
|
224 |
|
|
// --- Mutidimensional likelihood and Nearest-Neighbour methods
|
225 |
|
|
Use["PDERS"] = 1;
|
226 |
|
|
Use["PDERSD"] = 0;
|
227 |
|
|
Use["PDERSPCA"] = 0;
|
228 |
|
|
Use["PDEFoam"] = 1;
|
229 |
|
|
Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting
|
230 |
|
|
Use["KNN"] = 1; // k-nearest neighbour method
|
231 |
|
|
//
|
232 |
|
|
// --- Linear Discriminant Analysis
|
233 |
|
|
Use["LD"] = 1; // Linear Discriminant identical to Fisher
|
234 |
|
|
Use["Fisher"] = 0;
|
235 |
|
|
Use["FisherG"] = 0;
|
236 |
|
|
Use["BoostedFisher"] = 0; // uses generalised MVA method boosting
|
237 |
|
|
Use["HMatrix"] = 0;
|
238 |
|
|
//
|
239 |
|
|
// --- Function Discriminant analysis
|
240 |
|
|
Use["FDA_GA"] = 1; // minimisation of user-defined function using Genetics Algorithm
|
241 |
|
|
Use["FDA_SA"] = 0;
|
242 |
|
|
Use["FDA_MC"] = 0;
|
243 |
|
|
Use["FDA_MT"] = 0;
|
244 |
|
|
Use["FDA_GAMT"] = 0;
|
245 |
|
|
Use["FDA_MCMT"] = 0;
|
246 |
|
|
//
|
247 |
|
|
// --- Neural Networks (all are feed-forward Multilayer Perceptrons)
|
248 |
|
|
Use["MLP"] = 0; // Recommended ANN
|
249 |
|
|
Use["MLPBFGS"] = 0; // Recommended ANN with optional training method
|
250 |
|
|
Use["MLPBNN"] = 1; // Recommended ANN with BFGS training method and bayesian regulator
|
251 |
|
|
Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH
|
252 |
|
|
Use["TMlpANN"] = 0; // ROOT's own ANN
|
253 |
|
|
//
|
254 |
|
|
// --- Support Vector Machine
|
255 |
|
|
Use["SVM"] = 1;
|
256 |
|
|
//
|
257 |
|
|
// --- Boosted Decision Trees
|
258 |
|
|
Use["BDT"] = 1; // uses Adaptive Boost
|
259 |
|
|
Use["BDTG"] = 0; // uses Gradient Boost
|
260 |
|
|
Use["BDTB"] = 0; // uses Bagging
|
261 |
|
|
Use["BDTD"] = 0; // decorrelation + Adaptive Boost
|
262 |
|
|
//
|
263 |
|
|
// --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
|
264 |
|
|
Use["RuleFit"] = 1;
|
265 |
benhoob |
1.4 |
//
|
266 |
|
|
// --- multi-output MVA's
|
267 |
|
|
Use["multi_BDTG"] = 1;
|
268 |
|
|
Use["multi_MLP"] = 1;
|
269 |
|
|
Use["multi_FDA_GA"] = 0;
|
270 |
|
|
//
|
271 |
benhoob |
1.1 |
// ---------------------------------------------------------------
|
272 |
|
|
|
273 |
benhoob |
1.4 |
|
274 |
|
|
|
275 |
benhoob |
1.1 |
std::cout << std::endl;
|
276 |
|
|
std::cout << "==> Start TMVAClassification" << std::endl;
|
277 |
|
|
|
278 |
|
|
// Select methods (don't look at this code - not of interest)
|
279 |
|
|
if (myMethodList != "") {
|
280 |
|
|
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
|
281 |
|
|
|
282 |
|
|
std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
|
283 |
|
|
for (UInt_t i=0; i<mlist.size(); i++) {
|
284 |
|
|
std::string regMethod(mlist[i]);
|
285 |
|
|
|
286 |
|
|
if (Use.find(regMethod) == Use.end()) {
|
287 |
|
|
std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
|
288 |
|
|
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
|
289 |
|
|
std::cout << std::endl;
|
290 |
|
|
return;
|
291 |
|
|
}
|
292 |
|
|
Use[regMethod] = 1;
|
293 |
|
|
}
|
294 |
|
|
}
|
295 |
|
|
|
296 |
|
|
// --------------------------------------------------------------------------------------------------
|
297 |
|
|
|
298 |
|
|
// --- Here the preparation phase begins
|
299 |
|
|
|
300 |
|
|
// Create a ROOT output file where TMVA will store ntuples, histograms, etc.
|
301 |
benhoob |
1.2 |
TString outfileName( "TMVA_HWW.root" );
|
302 |
benhoob |
1.1 |
TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
|
303 |
|
|
|
304 |
benhoob |
1.4 |
TString multioutfileName( "TMVA_HWW_multi.root" );
|
305 |
|
|
TFile* multioutputFile;
|
306 |
|
|
if( doMultipleOutputs )
|
307 |
|
|
multioutputFile = TFile::Open( multioutfileName, "RECREATE" );
|
308 |
|
|
|
309 |
benhoob |
1.1 |
// Create the factory object. Later you can choose the methods
|
310 |
|
|
// whose performance you'd like to investigate. The factory is
|
311 |
|
|
// the only TMVA object you have to interact with
|
312 |
|
|
//
|
313 |
|
|
// The first argument is the base of the name of all the
|
314 |
|
|
// weightfiles in the directory weight/
|
315 |
|
|
//
|
316 |
|
|
// The second argument is the output file for the training results
|
317 |
|
|
// All TMVA output can be suppressed by removing the "!" (not) in
|
318 |
|
|
// front of the "Silent" argument in the option string
|
319 |
|
|
TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
|
320 |
|
|
"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
|
321 |
benhoob |
1.4 |
|
322 |
|
|
TMVA::Factory *multifactory;
|
323 |
|
|
if( doMultipleOutputs )
|
324 |
|
|
multifactory= new TMVA::Factory( "TMVAMulticlass", multioutputFile,
|
325 |
|
|
"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );
|
326 |
benhoob |
1.1 |
|
327 |
benhoob |
1.4 |
|
328 |
benhoob |
1.1 |
// If you wish to modify default settings
|
329 |
|
|
// (please check "src/Config.h" to see all available global options)
|
330 |
|
|
// (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
|
331 |
|
|
// (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
|
332 |
|
|
|
333 |
|
|
// Define the input variables that shall be used for the MVA training
|
334 |
|
|
// note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
|
335 |
|
|
// [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
|
336 |
|
|
//factory->AddVariable( "myvar1 := var1+var2", 'F' );
|
337 |
|
|
//factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
|
338 |
|
|
//factory->AddVariable( "var3", "Variable 3", "units", 'F' );
|
339 |
|
|
//factory->AddVariable( "var4", "Variable 4", "units", 'F' );
|
340 |
|
|
|
341 |
benhoob |
1.2 |
//--------------------------------------------------------
|
342 |
|
|
// choose which variables to include in training
|
343 |
|
|
//--------------------------------------------------------
|
344 |
benhoob |
1.4 |
|
345 |
|
|
if (mvaVar["lephard_pt"]) factory->AddVariable( "lephard_pt", "1st lepton pt", "GeV", 'F' );
|
346 |
|
|
if (mvaVar["lepsoft_pt"]) factory->AddVariable( "lepsoft_pt", "2nd lepton pt", "GeV", 'F' );
|
347 |
|
|
if (mvaVar["dil_dphi"]) factory->AddVariable( "dil_dphi", "dphi(ll)", "", 'F' );
|
348 |
|
|
if (mvaVar["dil_mass"]) factory->AddVariable( "dil_mass", "M(ll)", "GeV", 'F' );
|
349 |
|
|
if (mvaVar["event_type"]) factory->AddVariable( "event_type", "Dil Flavor Type", "", 'F' );
|
350 |
|
|
if (mvaVar["met_projpt"]) factory->AddVariable( "met_projpt", "Proj. MET", "GeV", 'F' );
|
351 |
|
|
if (mvaVar["met_pt"]) factory->AddVariable( "met_pt", "MET", "GeV", 'F' );
|
352 |
|
|
if (mvaVar["mt_lephardmet"]) factory->AddVariable( "mt_lephardmet", "MT(lep1,MET)", "GeV", 'F' );
|
353 |
|
|
if (mvaVar["mt_lepsoftmet"]) factory->AddVariable( "mt_lepsoftmet", "MT(lep2,MET)", "GeV", 'F' );
|
354 |
|
|
if (mvaVar["mthiggs"]) factory->AddVariable( "mthiggs", "MT(Higgs)", "GeV", 'F' );
|
355 |
|
|
if (mvaVar["dphi_lephardmet"]) factory->AddVariable( "dphi_lephardmet", "dphi(lep1,MET)", "GeV", 'F' );
|
356 |
|
|
if (mvaVar["dphi_lepsoftmet"]) factory->AddVariable( "dphi_lepsoftmet", "dphi(lep2,MET)", "GeV", 'F' );
|
357 |
|
|
if (mvaVar["lepsoft_fbrem"]) factory->AddVariable( "lepsoft_fbrem", "2nd lepton f_{brem}", "", 'F' );
|
358 |
|
|
if (mvaVar["lepsoft_eOverPIn"]) factory->AddVariable( "lepsoft_eOverPIn", "2nd lepton E/p", "", 'F' );
|
359 |
|
|
if (mvaVar["lepsoft_qdphi"]) factory->AddVariable( "lepsoft_q * lepsoft_dPhiIn", "2nd lepton q#times#Delta#phi", "", 'F' );
|
360 |
|
|
|
361 |
|
|
if( doMultipleOutputs ){
|
362 |
|
|
if (mvaVar["lephard_pt"]) multifactory->AddVariable( "lephard_pt", "1st lepton pt", "GeV", 'F' );
|
363 |
|
|
if (mvaVar["lepsoft_pt"]) multifactory->AddVariable( "lepsoft_pt", "2nd lepton pt", "GeV", 'F' );
|
364 |
|
|
if (mvaVar["dil_dphi"]) multifactory->AddVariable( "dil_dphi", "dphi(ll)", "", 'F' );
|
365 |
|
|
if (mvaVar["dil_mass"]) multifactory->AddVariable( "dil_mass", "M(ll)", "GeV", 'F' );
|
366 |
|
|
if (mvaVar["event_type"]) multifactory->AddVariable( "event_type", "Dil Flavor Type", "", 'F' );
|
367 |
|
|
if (mvaVar["met_projpt"]) multifactory->AddVariable( "met_projpt", "Proj. MET", "GeV", 'F' );
|
368 |
|
|
if (mvaVar["met_pt"]) multifactory->AddVariable( "met_pt", "MET", "GeV", 'F' );
|
369 |
|
|
if (mvaVar["mt_lephardmet"]) multifactory->AddVariable( "mt_lephardmet", "MT(lep1,MET)", "GeV", 'F' );
|
370 |
|
|
if (mvaVar["mt_lepsoftmet"]) multifactory->AddVariable( "mt_lepsoftmet", "MT(lep2,MET)", "GeV", 'F' );
|
371 |
|
|
if (mvaVar["mthiggs"]) multifactory->AddVariable( "mthiggs", "MT(Higgs)", "GeV", 'F' );
|
372 |
|
|
if (mvaVar["dphi_lephardmet"]) multifactory->AddVariable( "dphi_lephardmet", "dphi(lep1,MET)", "GeV", 'F' );
|
373 |
|
|
if (mvaVar["dphi_lepsoftmet"]) multifactory->AddVariable( "dphi_lepsoftmet", "dphi(lep2,MET)", "GeV", 'F' );
|
374 |
|
|
if (mvaVar["lepsoft_fbrem"]) multifactory->AddVariable( "lepsoft_fbrem", "2nd lepton f_{brem}", "", 'F' );
|
375 |
|
|
if (mvaVar["lepsoft_eOverPIn"]) multifactory->AddVariable( "lepsoft_eOverPIn", "2nd lepton E/p", "", 'F' );
|
376 |
|
|
if (mvaVar["lepsoft_qdphi"]) multifactory->AddVariable( "lepsoft_q * lepsoft_dPhiIn", "2nd lepton q#times#Delta#phi", "", 'F' );
|
377 |
|
|
}
|
378 |
benhoob |
1.3 |
|
379 |
|
|
if (mvaVar["lephard_pt"]) cout << "Adding variable to MVA training: lephard_pt" << endl;
|
380 |
|
|
if (mvaVar["lepsoft_pt"]) cout << "Adding variable to MVA training: lepsoft_pt" << endl;
|
381 |
|
|
if (mvaVar["dil_dphi"]) cout << "Adding variable to MVA training: dil_dphi" << endl;
|
382 |
|
|
if (mvaVar["dil_mass"]) cout << "Adding variable to MVA training: dil_mass" << endl;
|
383 |
|
|
if (mvaVar["event_type"]) cout << "Adding variable to MVA training: event_type" << endl;
|
384 |
|
|
if (mvaVar["met_projpt"]) cout << "Adding variable to MVA training: met_projpt" << endl;
|
385 |
|
|
if (mvaVar["met_pt"]) cout << "Adding variable to MVA training: met_pt" << endl;
|
386 |
|
|
if (mvaVar["mt_lephardmet"]) cout << "Adding variable to MVA training: mt_lephardmet" << endl;
|
387 |
|
|
if (mvaVar["mt_lepsoftmet"]) cout << "Adding variable to MVA training: mt_lepsoftmet" << endl;
|
388 |
benhoob |
1.4 |
if (mvaVar["mthiggs"]) cout << "Adding variable to MVA training: mthiggs" << endl;
|
389 |
benhoob |
1.3 |
if (mvaVar["dphi_lephardmet"]) cout << "Adding variable to MVA training: dphi_lephardmet" << endl;
|
390 |
|
|
if (mvaVar["dphi_lepsoftmet"]) cout << "Adding variable to MVA training: dphi_lepsoftmet" << endl;
|
391 |
benhoob |
1.4 |
if (mvaVar["lepsoft_fbrem"]) cout << "Adding variable to MVA training: lepsoft_fbrem" << endl;
|
392 |
|
|
if (mvaVar["lepsoft_eOverPIn"]) cout << "Adding variable to MVA training: lepsoft_eOverPIn" << endl;
|
393 |
|
|
if (mvaVar["lepsoft_qdphi"]) cout << "Adding variable to MVA training: lepsoft_qdphi" << endl;
|
394 |
benhoob |
1.1 |
|
395 |
|
|
// You can add so-called "Spectator variables", which are not used in the MVA training,
|
396 |
|
|
// but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
|
397 |
|
|
// input variables, the response values of all trained MVAs, and the spectator variables
|
398 |
|
|
//factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' );
|
399 |
|
|
//factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' );
|
400 |
|
|
|
401 |
benhoob |
1.2 |
TTree *signal = (TTree*) chsignal;
|
402 |
|
|
TTree *background = (TTree*) chbackground;
|
403 |
benhoob |
1.4 |
|
404 |
benhoob |
1.2 |
std::cout << "--- TMVAClassification : Using bkg input files: -------------------" << std::endl;
|
405 |
|
|
|
406 |
|
|
TObjArray *listOfBkgFiles = chbackground->GetListOfFiles();
|
407 |
|
|
TIter bkgFileIter(listOfBkgFiles);
|
408 |
|
|
TChainElement* currentBkgFile = 0;
|
409 |
|
|
|
410 |
|
|
while((currentBkgFile = (TChainElement*)bkgFileIter.Next())) {
|
411 |
|
|
std::cout << currentBkgFile->GetTitle() << std::endl;
|
412 |
|
|
}
|
413 |
|
|
|
414 |
|
|
std::cout << "--- TMVAClassification : Using sig input files: -------------------" << std::endl;
|
415 |
benhoob |
1.4 |
|
416 |
benhoob |
1.2 |
TObjArray *listOfSigFiles = chsignal->GetListOfFiles();
|
417 |
|
|
TIter sigFileIter(listOfSigFiles);
|
418 |
|
|
TChainElement* currentSigFile = 0;
|
419 |
benhoob |
1.1 |
|
420 |
benhoob |
1.2 |
while((currentSigFile = (TChainElement*)sigFileIter.Next())) {
|
421 |
|
|
std::cout << currentSigFile->GetTitle() << std::endl;
|
422 |
|
|
}
|
423 |
benhoob |
1.1 |
|
424 |
|
|
// global event weights per tree (see below for setting event-wise weights)
|
425 |
benhoob |
1.2 |
Double_t signalWeight = 1.0;
|
426 |
|
|
Double_t backgroundWeight = 1.0;
|
427 |
benhoob |
1.1 |
|
428 |
|
|
// You can add an arbitrary number of signal or background trees
|
429 |
|
|
factory->AddSignalTree ( signal, signalWeight );
|
430 |
|
|
factory->AddBackgroundTree( background, backgroundWeight );
|
431 |
benhoob |
1.4 |
|
432 |
benhoob |
1.1 |
// To give different trees for training and testing, do as follows:
|
433 |
|
|
// factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
|
434 |
|
|
// factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" );
|
435 |
|
|
|
436 |
|
|
// Use the following code instead of the above two or four lines to add signal and background
|
437 |
|
|
// training and test events "by hand"
|
438 |
|
|
// NOTE that in this case one should not give expressions (such as "var1+var2") in the input
|
439 |
|
|
// variable definition, but simply compute the expression before adding the event
|
440 |
|
|
//
|
441 |
|
|
// // --- begin ----------------------------------------------------------
|
442 |
|
|
// std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
|
443 |
|
|
// Float_t treevars[4], weight;
|
444 |
|
|
//
|
445 |
|
|
// // Signal
|
446 |
|
|
// for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
|
447 |
|
|
// for (UInt_t i=0; i<signal->GetEntries(); i++) {
|
448 |
|
|
// signal->GetEntry(i);
|
449 |
|
|
// for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
|
450 |
|
|
// // add training and test events; here: first half is training, second is testing
|
451 |
|
|
// // note that the weight can also be event-wise
|
452 |
|
|
// if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight );
|
453 |
|
|
// else factory->AddSignalTestEvent ( vars, signalWeight );
|
454 |
|
|
// }
|
455 |
|
|
//
|
456 |
|
|
// // Background (has event weights)
|
457 |
|
|
// background->SetBranchAddress( "weight", &weight );
|
458 |
|
|
// for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
|
459 |
|
|
// for (UInt_t i=0; i<background->GetEntries(); i++) {
|
460 |
|
|
// background->GetEntry(i);
|
461 |
|
|
// for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
|
462 |
|
|
// // add training and test events; here: first half is training, second is testing
|
463 |
|
|
// // note that the weight can also be event-wise
|
464 |
|
|
// if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
|
465 |
|
|
// else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight );
|
466 |
|
|
// }
|
467 |
|
|
// --- end ------------------------------------------------------------
|
468 |
|
|
//
|
469 |
|
|
// --- end of tree registration
|
470 |
benhoob |
1.4 |
|
471 |
benhoob |
1.1 |
// Set individual event weights (the variables must exist in the original TTree)
|
472 |
benhoob |
1.4 |
factory->SetSignalWeightExpression ("event_scale1fb * lepsoft_fr");
|
473 |
|
|
factory->SetBackgroundWeightExpression("event_scale1fb * lepsoft_fr");
|
474 |
|
|
|
475 |
|
|
if( doMultipleOutputs ){
|
476 |
|
|
multifactory->AddTree(signal,"Signal");
|
477 |
|
|
multifactory->SetSignalWeightExpression ("event_scale1fb");
|
478 |
|
|
multifactory->SetBackgroundWeightExpression("event_scale1fb");
|
479 |
|
|
multifactory->SetWeightExpression("event_scale1fb");
|
480 |
|
|
|
481 |
|
|
if( includeBkg["ww"] ){
|
482 |
|
|
TTree* ww = (TTree*) chww;
|
483 |
|
|
multifactory->AddTree(ww,"WW");
|
484 |
|
|
cout << "Added WW to multi-MVA" << endl;
|
485 |
|
|
}
|
486 |
|
|
if( includeBkg["wjets"] ){
|
487 |
|
|
TTree* wjets = (TTree*) chwjets;
|
488 |
|
|
multifactory->AddTree(wjets,"WJets");
|
489 |
|
|
cout << "Added W+jets to multi-MVA" << endl;
|
490 |
|
|
}
|
491 |
|
|
if( includeBkg["tt"] ){
|
492 |
|
|
TTree* tt = (TTree*) chtt;
|
493 |
|
|
multifactory->AddTree(tt,"tt");
|
494 |
|
|
cout << "Added ttbar multi-MVA" << endl;
|
495 |
|
|
}
|
496 |
|
|
}
|
497 |
benhoob |
1.1 |
|
498 |
|
|
// Apply additional cuts on the signal and background samples (can be different)
|
499 |
benhoob |
1.2 |
TCut mycuts = sel; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
|
500 |
|
|
TCut mycutb = sel; // for example: TCut mycutb = "abs(var1)<0.5";
|
501 |
benhoob |
1.1 |
|
502 |
|
|
// Tell the factory how to use the training and testing events
|
503 |
|
|
//
|
504 |
|
|
// If no numbers of events are given, half of the events in the tree are used
|
505 |
|
|
// for training, and the other half for testing:
|
506 |
|
|
// factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
|
507 |
|
|
// To also specify the number of testing events, use:
|
508 |
|
|
// factory->PrepareTrainingAndTestTree( mycut,
|
509 |
|
|
// "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
|
510 |
benhoob |
1.4 |
|
511 |
benhoob |
1.2 |
//Use random splitting
|
512 |
benhoob |
1.1 |
factory->PrepareTrainingAndTestTree( mycuts, mycutb,
|
513 |
|
|
"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
|
514 |
|
|
|
515 |
benhoob |
1.4 |
if( doMultipleOutputs ){
|
516 |
|
|
multifactory->PrepareTrainingAndTestTree( mycuts, mycutb,
|
517 |
|
|
"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
|
518 |
|
|
}
|
519 |
|
|
|
520 |
benhoob |
1.2 |
//Use alternate splitting
|
521 |
|
|
//(this is preferable since its easier to track which events were used for training, but the job crashes! need to fix this...)
|
522 |
|
|
//factory->PrepareTrainingAndTestTree( mycuts, mycutb,
|
523 |
|
|
// "nTrain_Signal=0:nTrain_Background=0:SplitMode=Alternate:NormMode=NumEvents:!V" );
|
524 |
|
|
|
525 |
benhoob |
1.1 |
// ---- Book MVA methods
|
526 |
|
|
//
|
527 |
|
|
// Please lookup the various method configuration options in the corresponding cxx files, eg:
|
528 |
|
|
// src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
|
529 |
|
|
// it is possible to preset ranges in the option string in which the cut optimisation should be done:
|
530 |
|
|
// "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable
|
531 |
|
|
|
532 |
|
|
// Cut optimisation
|
533 |
|
|
if (Use["Cuts"])
|
534 |
|
|
factory->BookMethod( TMVA::Types::kCuts, "Cuts",
|
535 |
|
|
"!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );
|
536 |
|
|
|
537 |
|
|
if (Use["CutsD"])
|
538 |
|
|
factory->BookMethod( TMVA::Types::kCuts, "CutsD",
|
539 |
|
|
"!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );
|
540 |
|
|
|
541 |
|
|
if (Use["CutsPCA"])
|
542 |
|
|
factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
|
543 |
|
|
"!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );
|
544 |
|
|
|
545 |
|
|
if (Use["CutsGA"])
|
546 |
|
|
factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
|
547 |
|
|
"H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );
|
548 |
|
|
|
549 |
|
|
if (Use["CutsSA"])
|
550 |
|
|
factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
|
551 |
|
|
"!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
|
552 |
|
|
|
553 |
|
|
// Likelihood ("naive Bayes estimator")
|
554 |
|
|
if (Use["Likelihood"])
|
555 |
|
|
factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
|
556 |
|
|
"H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
|
557 |
|
|
|
558 |
|
|
// Decorrelated likelihood
|
559 |
|
|
if (Use["LikelihoodD"])
|
560 |
|
|
factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
|
561 |
|
|
"!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );
|
562 |
|
|
|
563 |
|
|
// PCA-transformed likelihood
|
564 |
|
|
if (Use["LikelihoodPCA"])
|
565 |
|
|
factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
|
566 |
|
|
"!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" );
|
567 |
|
|
|
568 |
|
|
// Use a kernel density estimator to approximate the PDFs
|
569 |
|
|
if (Use["LikelihoodKDE"])
|
570 |
|
|
factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
|
571 |
|
|
"!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );
|
572 |
|
|
|
573 |
|
|
// Use a variable-dependent mix of splines and kernel density estimator
|
574 |
|
|
if (Use["LikelihoodMIX"])
|
575 |
|
|
factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
|
576 |
|
|
"!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );
|
577 |
|
|
|
578 |
|
|
// Test the multi-dimensional probability density estimator
|
579 |
|
|
// here are the options strings for the MinMax and RMS methods, respectively:
|
580 |
|
|
// "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
|
581 |
|
|
// "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
|
582 |
|
|
if (Use["PDERS"])
|
583 |
|
|
factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
|
584 |
|
|
"!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );
|
585 |
|
|
|
586 |
|
|
if (Use["PDERSD"])
|
587 |
|
|
factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
|
588 |
|
|
"!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );
|
589 |
|
|
|
590 |
|
|
if (Use["PDERSPCA"])
|
591 |
|
|
factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
|
592 |
|
|
"!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );
|
593 |
|
|
|
594 |
|
|
// Multi-dimensional likelihood estimator using self-adapting phase-space binning
|
595 |
|
|
if (Use["PDEFoam"])
|
596 |
|
|
factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
|
597 |
|
|
"H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );
|
598 |
|
|
|
599 |
|
|
if (Use["PDEFoamBoost"])
|
600 |
|
|
factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
|
601 |
|
|
"!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );
|
602 |
|
|
|
603 |
|
|
// K-Nearest Neighbour classifier (KNN)
|
604 |
|
|
if (Use["KNN"])
|
605 |
|
|
factory->BookMethod( TMVA::Types::kKNN, "KNN",
|
606 |
|
|
"H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
|
607 |
|
|
|
608 |
|
|
// H-Matrix (chi2-squared) method
|
609 |
|
|
if (Use["HMatrix"])
|
610 |
|
|
factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" );
|
611 |
|
|
|
612 |
|
|
// Linear discriminant (same as Fisher discriminant)
|
613 |
|
|
if (Use["LD"])
|
614 |
|
|
factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
|
615 |
|
|
|
616 |
|
|
// Fisher discriminant (same as LD)
|
617 |
|
|
if (Use["Fisher"])
|
618 |
|
|
factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
|
619 |
|
|
|
620 |
|
|
// Fisher with Gauss-transformed input variables
|
621 |
|
|
if (Use["FisherG"])
|
622 |
|
|
factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );
|
623 |
|
|
|
624 |
|
|
// Composite classifier: ensemble (tree) of boosted Fisher classifiers
|
625 |
|
|
if (Use["BoostedFisher"])
|
626 |
|
|
factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher",
|
627 |
|
|
"H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" );
|
628 |
|
|
|
629 |
|
|
// Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
|
630 |
|
|
if (Use["FDA_MC"])
|
631 |
|
|
factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
|
632 |
|
|
"H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );
|
633 |
|
|
|
634 |
|
|
if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
|
635 |
|
|
factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
|
636 |
|
|
"H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
|
637 |
|
|
|
638 |
|
|
if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
|
639 |
|
|
factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
|
640 |
|
|
"H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
|
641 |
|
|
|
642 |
|
|
if (Use["FDA_MT"])
|
643 |
|
|
factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
|
644 |
|
|
"H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
|
645 |
|
|
|
646 |
|
|
if (Use["FDA_GAMT"])
|
647 |
|
|
factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
|
648 |
|
|
"H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
|
649 |
|
|
|
650 |
|
|
if (Use["FDA_MCMT"])
|
651 |
|
|
factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
|
652 |
|
|
"H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );
|
653 |
|
|
|
654 |
|
|
// TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
|
655 |
|
|
if (Use["MLP"])
|
656 |
|
|
factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );
|
657 |
|
|
|
658 |
|
|
if (Use["MLPBFGS"])
|
659 |
|
|
factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );
|
660 |
|
|
|
661 |
|
|
if (Use["MLPBNN"])
|
662 |
|
|
factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators
|
663 |
|
|
|
664 |
|
|
// CF(Clermont-Ferrand)ANN
|
665 |
|
|
if (Use["CFMlpANN"])
|
666 |
|
|
factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:...
|
667 |
|
|
|
668 |
|
|
// Tmlp(Root)ANN
|
669 |
|
|
if (Use["TMlpANN"])
|
670 |
|
|
factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:...
|
671 |
|
|
|
672 |
|
|
// Support Vector Machine
|
673 |
|
|
if (Use["SVM"])
|
674 |
|
|
factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
|
675 |
|
|
|
676 |
|
|
// Boosted Decision Trees
|
677 |
|
|
if (Use["BDTG"]) // Gradient Boost
|
678 |
|
|
factory->BookMethod( TMVA::Types::kBDT, "BDTG",
|
679 |
|
|
"!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" );
|
680 |
|
|
|
681 |
|
|
if (Use["BDT"]) // Adaptive Boost
|
682 |
|
|
factory->BookMethod( TMVA::Types::kBDT, "BDT",
|
683 |
|
|
"!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
|
684 |
|
|
|
685 |
|
|
if (Use["BDTB"]) // Bagging
|
686 |
|
|
factory->BookMethod( TMVA::Types::kBDT, "BDTB",
|
687 |
|
|
"!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
|
688 |
|
|
|
689 |
|
|
if (Use["BDTD"]) // Decorrelation + Adaptive Boost
|
690 |
|
|
factory->BookMethod( TMVA::Types::kBDT, "BDTD",
|
691 |
|
|
"!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );
|
692 |
|
|
|
693 |
|
|
// RuleFit -- TMVA implementation of Friedman's method
|
694 |
|
|
if (Use["RuleFit"])
|
695 |
|
|
factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
|
696 |
|
|
"H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );
|
697 |
|
|
|
698 |
benhoob |
1.4 |
if( doMultipleOutputs ){
|
699 |
|
|
if (Use["multi_BDTG"]) // gradient boosted decision trees
|
700 |
|
|
multifactory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8");
|
701 |
|
|
if (Use["multi_MLP"]) // neural network
|
702 |
|
|
multifactory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE");
|
703 |
|
|
if (Use["multi_FDA_GA"]) // functional discriminant with GA minimizer
|
704 |
|
|
multifactory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
|
705 |
|
|
}
|
706 |
|
|
|
707 |
benhoob |
1.1 |
// For an example of the category classifier usage, see: TMVAClassificationCategory
|
708 |
|
|
|
709 |
|
|
// --------------------------------------------------------------------------------------------------
|
710 |
|
|
|
711 |
|
|
// ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events
|
712 |
|
|
|
713 |
|
|
// factory->OptimizeAllMethods("SigEffAt001","Scan");
|
714 |
|
|
// factory->OptimizeAllMethods("ROCIntegral","GA");
|
715 |
|
|
|
716 |
|
|
// --------------------------------------------------------------------------------------------------
|
717 |
|
|
|
718 |
|
|
// ---- Now you can tell the factory to train, test, and evaluate the MVAs
|
719 |
benhoob |
1.4 |
|
720 |
benhoob |
1.1 |
// Train MVAs using the set of training events
|
721 |
|
|
factory->TrainAllMethods();
|
722 |
benhoob |
1.4 |
|
723 |
benhoob |
1.1 |
// ---- Evaluate all MVAs using the set of test events
|
724 |
|
|
factory->TestAllMethods();
|
725 |
benhoob |
1.4 |
|
726 |
benhoob |
1.1 |
// ----- Evaluate and compare performance of all configured MVAs
|
727 |
|
|
factory->EvaluateAllMethods();
|
728 |
benhoob |
1.4 |
|
729 |
|
|
if( doMultipleOutputs ){
|
730 |
|
|
// Train nulti-MVAs using the set of training events
|
731 |
|
|
multifactory->TrainAllMethods();
|
732 |
|
|
|
733 |
|
|
// ---- Evaluate all multi-MVAs using the set of test events
|
734 |
|
|
multifactory->TestAllMethods();
|
735 |
|
|
|
736 |
|
|
// ----- Evaluate and compare performance of all configured multi-MVAs
|
737 |
|
|
multifactory->EvaluateAllMethods();
|
738 |
|
|
}
|
739 |
|
|
|
740 |
benhoob |
1.1 |
// --------------------------------------------------------------
|
741 |
|
|
|
742 |
|
|
// Save the output
|
743 |
|
|
outputFile->Close();
|
744 |
benhoob |
1.4 |
if( doMultipleOutputs ) multioutputFile->Close();
|
745 |
benhoob |
1.1 |
|
746 |
|
|
std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
|
747 |
|
|
std::cout << "==> TMVAClassification is done!" << std::endl;
|
748 |
benhoob |
1.4 |
|
749 |
benhoob |
1.1 |
delete factory;
|
750 |
|
|
|
751 |
|
|
// Launch the GUI for the root macros
|
752 |
|
|
if (!gROOT->IsBatch()) TMVAGui( outfileName );
|
753 |
|
|
}
|