ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/benhoob/HWW/TMVA_HWW.C
Revision: 1.4
Committed: Thu Mar 3 11:57:58 2011 UTC (14 years, 2 months ago) by benhoob
Content type: text/plain
Branch: MAIN
CVS Tags: HEAD
Changes since 1.3: +223 -86 lines
Log Message:
Organize code

File Contents

# User Rev Content
1 benhoob 1.4 // @(#)root/tmva $Id: TMVA_HWW.C,v 1.3 2011/02/16 11:04:35 benhoob Exp $
2 benhoob 1.1 /**********************************************************************************
3     * Project : TMVA - a ROOT-integrated toolkit for multivariate data analysis *
4     * Package : TMVA *
5     * Root Macro: TMVAClassification *
6     * *
7     * This macro provides examples for the training and testing of the *
8     * TMVA classifiers. *
9     * *
10     * As input data is used a toy-MC sample consisting of four Gaussian-distributed *
11     * and linearly correlated input variables. *
12     * *
13     * The methods to be used can be switched on and off by means of booleans, or *
14     * via the prompt command, for example: *
15     * *
16     * root -l ./TMVAClassification.C\(\"Fisher,Likelihood\"\) *
17     * *
18     * (note that the backslashes are mandatory) *
19     * If no method given, a default set of classifiers is used. *
20     * *
21     * The output file "TMVA.root" can be analysed with the use of dedicated *
22     * macros (simply say: root -l <macro.C>), which can be conveniently *
23     * invoked through a GUI that will appear at the end of the run of this macro. *
24     * Launch the GUI via the command: *
25     * *
26     * root -l ./TMVAGui.C *
27     * *
28     **********************************************************************************/
29    
30     #include <cstdlib>
31     #include <iostream>
32     #include <map>
33     #include <string>
34    
35     #include "TChain.h"
36     #include "TFile.h"
37     #include "TTree.h"
38     #include "TString.h"
39     #include "TObjString.h"
40     #include "TSystem.h"
41     #include "TROOT.h"
42    
43     #include "TMVAGui.C"
44    
45     #if not defined(__CINT__) || defined(__MAKECINT__)
46     // needs to be included when makecint runs (ACLIC)
47     #include "TMVA/Factory.h"
48     #include "TMVA/Tools.h"
49     #endif
50    
51 benhoob 1.3
52 benhoob 1.1 void TMVA_HWW( TString myMethodList = "" )
53     {
54     // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
55     // if you use your private .rootrc, or run from a different directory, please copy the
56     // corresponding lines from .rootrc
57    
58     // methods to be processed can be given as an argument; use format:
59     //
60     // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
61     //
62     // if you like to use a method via the plugin mechanism, we recommend using
63     //
64     // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
65     // (an example is given for using the BDT as plugin (see below),
66     // but of course the real application is when you write your own
67     // method based)
68    
69 benhoob 1.3
70     //-----------------------------------------------------
71     // define event selection (store in TCut sel)
72     //-----------------------------------------------------
73    
74     TCut met_projpt = "((event_type<0.5||event_type>2.5)&met_projpt>35)|((event_type>0.5&&event_type<2.5)&met_projpt>20)";
75     TCut pt2020 = "lephard_pt > 20 && lepsoft_pt > 20";
76     TCut pt2010 = "lephard_pt > 20 && lepsoft_pt > 10";
77     TCut jetveto = "jets_num==0 && extralep_num==0 && lowptbtags_num==0 && softmu_num==0";
78     TCut mll12 = "dil_mass > 12.";
79 benhoob 1.4 TCut mll90 = "dil_mass < 90.";
80     TCut mll100 = "dil_mass < 100.";
81     TCut mll130 = "dil_mass < 130.";
82 benhoob 1.3 TCut tight_el_ID = "lepsoft_passTighterId==1";
83     TCut mutype = "event_type < 1.5";
84     TCut eltype = "event_type > 1.5";
85 benhoob 1.4 TCut elsoft15 = "event_type < 1.5 || lepsoft_pt > 15.";
86    
87     //------------
88     //Higgs130
89     //------------
90    
91     //TCut sel = pt2010 + met_projpt + jetveto + mll12 + mll90 + tight_el_ID;
92     //int mH = 130;
93    
94     //------------
95     //Higgs160
96     //------------
97 benhoob 1.3
98 benhoob 1.4 TCut sel = pt2020 + met_projpt + jetveto + mll12 + mll100;
99     int mH = 160;
100    
101     //------------
102     //Higgs200
103     //------------
104    
105     //TCut sel = pt2020 + met_projpt + jetveto + mll12 + mll130;
106     //int mH = 200;
107    
108 benhoob 1.3 //-----------------------------------------------------
109     // choose which variables to include in MVA training
110     //-----------------------------------------------------
111    
112     std::map<std::string,int> mvaVar;
113 benhoob 1.4 mvaVar[ "lephard_pt" ] = 1;
114     mvaVar[ "lepsoft_pt" ] = 1;
115     mvaVar[ "dil_dphi" ] = 1;
116     mvaVar[ "dil_mass" ] = 1;
117     mvaVar[ "event_type" ] = 0;
118     mvaVar[ "met_projpt" ] = 0;
119     mvaVar[ "met_pt" ] = 0;
120     mvaVar[ "mt_lephardmet" ] = 0;
121     mvaVar[ "mt_lepsoftmet" ] = 0;
122     mvaVar[ "mthiggs" ] = 0;
123     mvaVar[ "dphi_lephardmet" ] = 0;
124     mvaVar[ "dphi_lepsoftmet" ] = 0;
125     mvaVar[ "lepsoft_fbrem" ] = 0;
126     mvaVar[ "lepsoft_eOverPIn" ] = 0;
127     mvaVar[ "lepsoft_qdphi" ] = 0;
128 benhoob 1.3
129     //---------------------------------
130     //choose bkg samples to include
131     //---------------------------------
132    
133     const char* babyPath = "/tas/cerati/HtoWWmvaBabies/latest";
134    
135     TChain *chbackground = new TChain("Events");
136 benhoob 1.4 chbackground->Add(Form("%s/WWTo2L2Nu_PU_testFinal_baby.root",babyPath));
137     chbackground->Add(Form("%s/GluGluToWWTo4L_PU_testFinal_baby.root",babyPath));
138     // chbackground->Add(Form("%s/WZ_PU_testFinal_baby.root",babyPath));
139     // chbackground->Add(Form("%s/ZZ_PU_testFinal_baby.root",babyPath));
140     // chbackground->Add(Form("%s/TTJets_PU_testFinal_baby.root",babyPath));
141     // chbackground->Add(Form("%s/tW_PU_testFinal_baby.root",babyPath));
142     // chbackground->Add(Form("%s/WJetsToLNu_PU_testFinal_baby.root",babyPath));
143     // chbackground->Add(Form("%s/DYToMuMuM20_PU_testFinal_baby.root",babyPath) );
144     // chbackground->Add(Form("%s/DYToMuMuM10To20_PU_testFinal_baby.root",babyPath) );
145     // chbackground->Add(Form("%s/DYToEEM20_PU_testFinal_baby.root",babyPath) );
146     // chbackground->Add(Form("%s/DYToEEM10To20_PU_testFinal_baby.root",babyPath) );
147     // chbackground->Add(Form("%s/DYToTauTauM20_PU_testFinal_baby.root",babyPath) );
148     // chbackground->Add(Form("%s/DYToTauTauM10To20_PU_testFinal_baby.root",babyPath) );
149    
150     // chbackground->Add(Form("%s/WJetsToLNu_PU_testFinal_baby_FO1.root",babyPath));
151     // chbackground->Add(Form("%s/WJetsToLNu_PU_testFinal_baby_FO2.root",babyPath));
152     // chbackground->Add(Form("%s/WJetsToLNu_PU_testFinal_baby_FO3.root",babyPath));
153     // chbackground->Add(Form("%s/WToLNu_FOv3_testFinal_baby.root",babyPath));
154     // chbackground->Add(Form("%s/WJetsToLNu_PU_testFinal_baby_FO4.root",babyPath));
155    
156     //---------------------------------
157     //choose signal sample to include
158     //---------------------------------
159 benhoob 1.3
160 benhoob 1.4 TChain *chsignal = new TChain("Events");
161 benhoob 1.3
162 benhoob 1.4 if( mH == 130 ){
163     chsignal->Add(Form("%s/HToWWTo2L2NuM130_PU_testFinal_baby.root",babyPath));
164     chsignal->Add(Form("%s/HToWWToLNuTauNuM130_PU_testFinal_baby.root",babyPath));
165     chsignal->Add(Form("%s/HToWWTo2Tau2NuM130_PU_testFinal_baby.root",babyPath));
166     }
167     else if( mH == 160 ){
168     chsignal->Add(Form("%s/HToWWTo2L2NuM160_PU_testFinal_baby.root",babyPath));
169     chsignal->Add(Form("%s/HToWWToLNuTauNuM160_PU_testFinal_baby.root",babyPath));
170     chsignal->Add(Form("%s/HToWWTo2Tau2NuM160_PU_testFinal_baby.root",babyPath));
171     }
172     else if( mH == 200 ){
173     chsignal->Add(Form("%s/HToWWTo2L2NuM200_PU_testFinal_baby.root",babyPath));
174     chsignal->Add(Form("%s/HToWWToLNuTauNuM200_PU_testFinal_baby.root",babyPath));
175     chsignal->Add(Form("%s/HToWWTo2Tau2NuM200_PU_testFinal_baby.root",babyPath));
176     }
177     else{
178     std::cout << "Error, unrecognized higgs mass " << mH << " GeV, quitting" << std::endl;
179     exit(0);
180     }
181    
182     //-----------------------------------------------------
183     // choose backgrounds to include for multiple outputs
184     //-----------------------------------------------------
185    
186     bool doMultipleOutputs = false;
187    
188     TChain *chww = new TChain("Events");
189     chww->Add(Form("%s/WWTo2L2Nu_PU_testFinal_baby.root",babyPath));
190     chww->Add(Form("%s/GluGluToWWTo4L_PU_testFinal_baby.root",babyPath));
191    
192     TChain *chwjets = new TChain("Events");
193     chwjets->Add(Form("%s/WJetsToLNu_PU_testFinal_baby.root",babyPath));
194    
195     TChain *chtt = new TChain("Events");
196     chtt->Add(Form("%s/TTJets_PU_testFinal_baby.root",babyPath));
197    
198     std::map<std::string,int> includeBkg;
199     includeBkg["ww"] = 1;
200     includeBkg["wjets"] = 0;
201     includeBkg["tt"] = 0;
202 benhoob 1.3
203 benhoob 1.1 //---------------------------------------------------------------
204     // This loads the library
205     TMVA::Tools::Instance();
206    
207     // Default MVA methods to be trained + tested
208     std::map<std::string,int> Use;
209    
210     // --- Cut optimisation
211     Use["Cuts"] = 1;
212     Use["CutsD"] = 1;
213     Use["CutsPCA"] = 0;
214     Use["CutsGA"] = 0;
215     Use["CutsSA"] = 0;
216     //
217     // --- 1-dimensional likelihood ("naive Bayes estimator")
218     Use["Likelihood"] = 1;
219     Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings)
220     Use["LikelihoodPCA"] = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
221     Use["LikelihoodKDE"] = 0;
222     Use["LikelihoodMIX"] = 0;
223     //
224     // --- Mutidimensional likelihood and Nearest-Neighbour methods
225     Use["PDERS"] = 1;
226     Use["PDERSD"] = 0;
227     Use["PDERSPCA"] = 0;
228     Use["PDEFoam"] = 1;
229     Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting
230     Use["KNN"] = 1; // k-nearest neighbour method
231     //
232     // --- Linear Discriminant Analysis
233     Use["LD"] = 1; // Linear Discriminant identical to Fisher
234     Use["Fisher"] = 0;
235     Use["FisherG"] = 0;
236     Use["BoostedFisher"] = 0; // uses generalised MVA method boosting
237     Use["HMatrix"] = 0;
238     //
239     // --- Function Discriminant analysis
240     Use["FDA_GA"] = 1; // minimisation of user-defined function using Genetics Algorithm
241     Use["FDA_SA"] = 0;
242     Use["FDA_MC"] = 0;
243     Use["FDA_MT"] = 0;
244     Use["FDA_GAMT"] = 0;
245     Use["FDA_MCMT"] = 0;
246     //
247     // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
248     Use["MLP"] = 0; // Recommended ANN
249     Use["MLPBFGS"] = 0; // Recommended ANN with optional training method
250     Use["MLPBNN"] = 1; // Recommended ANN with BFGS training method and bayesian regulator
251     Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH
252     Use["TMlpANN"] = 0; // ROOT's own ANN
253     //
254     // --- Support Vector Machine
255     Use["SVM"] = 1;
256     //
257     // --- Boosted Decision Trees
258     Use["BDT"] = 1; // uses Adaptive Boost
259     Use["BDTG"] = 0; // uses Gradient Boost
260     Use["BDTB"] = 0; // uses Bagging
261     Use["BDTD"] = 0; // decorrelation + Adaptive Boost
262     //
263     // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
264     Use["RuleFit"] = 1;
265 benhoob 1.4 //
266     // --- multi-output MVA's
267     Use["multi_BDTG"] = 1;
268     Use["multi_MLP"] = 1;
269     Use["multi_FDA_GA"] = 0;
270     //
271 benhoob 1.1 // ---------------------------------------------------------------
272    
273 benhoob 1.4
274    
275 benhoob 1.1 std::cout << std::endl;
276     std::cout << "==> Start TMVAClassification" << std::endl;
277    
278     // Select methods (don't look at this code - not of interest)
279     if (myMethodList != "") {
280     for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
281    
282     std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
283     for (UInt_t i=0; i<mlist.size(); i++) {
284     std::string regMethod(mlist[i]);
285    
286     if (Use.find(regMethod) == Use.end()) {
287     std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
288     for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
289     std::cout << std::endl;
290     return;
291     }
292     Use[regMethod] = 1;
293     }
294     }
295    
296     // --------------------------------------------------------------------------------------------------
297    
298     // --- Here the preparation phase begins
299    
300     // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
301 benhoob 1.2 TString outfileName( "TMVA_HWW.root" );
302 benhoob 1.1 TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
303    
304 benhoob 1.4 TString multioutfileName( "TMVA_HWW_multi.root" );
305     TFile* multioutputFile;
306     if( doMultipleOutputs )
307     multioutputFile = TFile::Open( multioutfileName, "RECREATE" );
308    
309 benhoob 1.1 // Create the factory object. Later you can choose the methods
310     // whose performance you'd like to investigate. The factory is
311     // the only TMVA object you have to interact with
312     //
313     // The first argument is the base of the name of all the
314     // weightfiles in the directory weight/
315     //
316     // The second argument is the output file for the training results
317     // All TMVA output can be suppressed by removing the "!" (not) in
318     // front of the "Silent" argument in the option string
319     TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
320     "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
321 benhoob 1.4
322     TMVA::Factory *multifactory;
323     if( doMultipleOutputs )
324     multifactory= new TMVA::Factory( "TMVAMulticlass", multioutputFile,
325     "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );
326 benhoob 1.1
327 benhoob 1.4
328 benhoob 1.1 // If you wish to modify default settings
329     // (please check "src/Config.h" to see all available global options)
330     // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
331     // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
332    
333     // Define the input variables that shall be used for the MVA training
334     // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
335     // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
336     //factory->AddVariable( "myvar1 := var1+var2", 'F' );
337     //factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
338     //factory->AddVariable( "var3", "Variable 3", "units", 'F' );
339     //factory->AddVariable( "var4", "Variable 4", "units", 'F' );
340    
341 benhoob 1.2 //--------------------------------------------------------
342     // choose which variables to include in training
343     //--------------------------------------------------------
344 benhoob 1.4
345     if (mvaVar["lephard_pt"]) factory->AddVariable( "lephard_pt", "1st lepton pt", "GeV", 'F' );
346     if (mvaVar["lepsoft_pt"]) factory->AddVariable( "lepsoft_pt", "2nd lepton pt", "GeV", 'F' );
347     if (mvaVar["dil_dphi"]) factory->AddVariable( "dil_dphi", "dphi(ll)", "", 'F' );
348     if (mvaVar["dil_mass"]) factory->AddVariable( "dil_mass", "M(ll)", "GeV", 'F' );
349     if (mvaVar["event_type"]) factory->AddVariable( "event_type", "Dil Flavor Type", "", 'F' );
350     if (mvaVar["met_projpt"]) factory->AddVariable( "met_projpt", "Proj. MET", "GeV", 'F' );
351     if (mvaVar["met_pt"]) factory->AddVariable( "met_pt", "MET", "GeV", 'F' );
352     if (mvaVar["mt_lephardmet"]) factory->AddVariable( "mt_lephardmet", "MT(lep1,MET)", "GeV", 'F' );
353     if (mvaVar["mt_lepsoftmet"]) factory->AddVariable( "mt_lepsoftmet", "MT(lep2,MET)", "GeV", 'F' );
354     if (mvaVar["mthiggs"]) factory->AddVariable( "mthiggs", "MT(Higgs)", "GeV", 'F' );
355     if (mvaVar["dphi_lephardmet"]) factory->AddVariable( "dphi_lephardmet", "dphi(lep1,MET)", "GeV", 'F' );
356     if (mvaVar["dphi_lepsoftmet"]) factory->AddVariable( "dphi_lepsoftmet", "dphi(lep2,MET)", "GeV", 'F' );
357     if (mvaVar["lepsoft_fbrem"]) factory->AddVariable( "lepsoft_fbrem", "2nd lepton f_{brem}", "", 'F' );
358     if (mvaVar["lepsoft_eOverPIn"]) factory->AddVariable( "lepsoft_eOverPIn", "2nd lepton E/p", "", 'F' );
359     if (mvaVar["lepsoft_qdphi"]) factory->AddVariable( "lepsoft_q * lepsoft_dPhiIn", "2nd lepton q#times#Delta#phi", "", 'F' );
360    
361     if( doMultipleOutputs ){
362     if (mvaVar["lephard_pt"]) multifactory->AddVariable( "lephard_pt", "1st lepton pt", "GeV", 'F' );
363     if (mvaVar["lepsoft_pt"]) multifactory->AddVariable( "lepsoft_pt", "2nd lepton pt", "GeV", 'F' );
364     if (mvaVar["dil_dphi"]) multifactory->AddVariable( "dil_dphi", "dphi(ll)", "", 'F' );
365     if (mvaVar["dil_mass"]) multifactory->AddVariable( "dil_mass", "M(ll)", "GeV", 'F' );
366     if (mvaVar["event_type"]) multifactory->AddVariable( "event_type", "Dil Flavor Type", "", 'F' );
367     if (mvaVar["met_projpt"]) multifactory->AddVariable( "met_projpt", "Proj. MET", "GeV", 'F' );
368     if (mvaVar["met_pt"]) multifactory->AddVariable( "met_pt", "MET", "GeV", 'F' );
369     if (mvaVar["mt_lephardmet"]) multifactory->AddVariable( "mt_lephardmet", "MT(lep1,MET)", "GeV", 'F' );
370     if (mvaVar["mt_lepsoftmet"]) multifactory->AddVariable( "mt_lepsoftmet", "MT(lep2,MET)", "GeV", 'F' );
371     if (mvaVar["mthiggs"]) multifactory->AddVariable( "mthiggs", "MT(Higgs)", "GeV", 'F' );
372     if (mvaVar["dphi_lephardmet"]) multifactory->AddVariable( "dphi_lephardmet", "dphi(lep1,MET)", "GeV", 'F' );
373     if (mvaVar["dphi_lepsoftmet"]) multifactory->AddVariable( "dphi_lepsoftmet", "dphi(lep2,MET)", "GeV", 'F' );
374     if (mvaVar["lepsoft_fbrem"]) multifactory->AddVariable( "lepsoft_fbrem", "2nd lepton f_{brem}", "", 'F' );
375     if (mvaVar["lepsoft_eOverPIn"]) multifactory->AddVariable( "lepsoft_eOverPIn", "2nd lepton E/p", "", 'F' );
376     if (mvaVar["lepsoft_qdphi"]) multifactory->AddVariable( "lepsoft_q * lepsoft_dPhiIn", "2nd lepton q#times#Delta#phi", "", 'F' );
377     }
378 benhoob 1.3
379     if (mvaVar["lephard_pt"]) cout << "Adding variable to MVA training: lephard_pt" << endl;
380     if (mvaVar["lepsoft_pt"]) cout << "Adding variable to MVA training: lepsoft_pt" << endl;
381     if (mvaVar["dil_dphi"]) cout << "Adding variable to MVA training: dil_dphi" << endl;
382     if (mvaVar["dil_mass"]) cout << "Adding variable to MVA training: dil_mass" << endl;
383     if (mvaVar["event_type"]) cout << "Adding variable to MVA training: event_type" << endl;
384     if (mvaVar["met_projpt"]) cout << "Adding variable to MVA training: met_projpt" << endl;
385     if (mvaVar["met_pt"]) cout << "Adding variable to MVA training: met_pt" << endl;
386     if (mvaVar["mt_lephardmet"]) cout << "Adding variable to MVA training: mt_lephardmet" << endl;
387     if (mvaVar["mt_lepsoftmet"]) cout << "Adding variable to MVA training: mt_lepsoftmet" << endl;
388 benhoob 1.4 if (mvaVar["mthiggs"]) cout << "Adding variable to MVA training: mthiggs" << endl;
389 benhoob 1.3 if (mvaVar["dphi_lephardmet"]) cout << "Adding variable to MVA training: dphi_lephardmet" << endl;
390     if (mvaVar["dphi_lepsoftmet"]) cout << "Adding variable to MVA training: dphi_lepsoftmet" << endl;
391 benhoob 1.4 if (mvaVar["lepsoft_fbrem"]) cout << "Adding variable to MVA training: lepsoft_fbrem" << endl;
392     if (mvaVar["lepsoft_eOverPIn"]) cout << "Adding variable to MVA training: lepsoft_eOverPIn" << endl;
393     if (mvaVar["lepsoft_qdphi"]) cout << "Adding variable to MVA training: lepsoft_qdphi" << endl;
394 benhoob 1.1
395     // You can add so-called "Spectator variables", which are not used in the MVA training,
396     // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
397     // input variables, the response values of all trained MVAs, and the spectator variables
398     //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' );
399     //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' );
400    
401 benhoob 1.2 TTree *signal = (TTree*) chsignal;
402     TTree *background = (TTree*) chbackground;
403 benhoob 1.4
404 benhoob 1.2 std::cout << "--- TMVAClassification : Using bkg input files: -------------------" << std::endl;
405    
406     TObjArray *listOfBkgFiles = chbackground->GetListOfFiles();
407     TIter bkgFileIter(listOfBkgFiles);
408     TChainElement* currentBkgFile = 0;
409    
410     while((currentBkgFile = (TChainElement*)bkgFileIter.Next())) {
411     std::cout << currentBkgFile->GetTitle() << std::endl;
412     }
413    
414     std::cout << "--- TMVAClassification : Using sig input files: -------------------" << std::endl;
415 benhoob 1.4
416 benhoob 1.2 TObjArray *listOfSigFiles = chsignal->GetListOfFiles();
417     TIter sigFileIter(listOfSigFiles);
418     TChainElement* currentSigFile = 0;
419 benhoob 1.1
420 benhoob 1.2 while((currentSigFile = (TChainElement*)sigFileIter.Next())) {
421     std::cout << currentSigFile->GetTitle() << std::endl;
422     }
423 benhoob 1.1
424     // global event weights per tree (see below for setting event-wise weights)
425 benhoob 1.2 Double_t signalWeight = 1.0;
426     Double_t backgroundWeight = 1.0;
427 benhoob 1.1
428     // You can add an arbitrary number of signal or background trees
429     factory->AddSignalTree ( signal, signalWeight );
430     factory->AddBackgroundTree( background, backgroundWeight );
431 benhoob 1.4
432 benhoob 1.1 // To give different trees for training and testing, do as follows:
433     // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
434     // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" );
435    
436     // Use the following code instead of the above two or four lines to add signal and background
437     // training and test events "by hand"
438     // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
439     // variable definition, but simply compute the expression before adding the event
440     //
441     // // --- begin ----------------------------------------------------------
442     // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
443     // Float_t treevars[4], weight;
444     //
445     // // Signal
446     // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
447     // for (UInt_t i=0; i<signal->GetEntries(); i++) {
448     // signal->GetEntry(i);
449     // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
450     // // add training and test events; here: first half is training, second is testing
451     // // note that the weight can also be event-wise
452     // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight );
453     // else factory->AddSignalTestEvent ( vars, signalWeight );
454     // }
455     //
456     // // Background (has event weights)
457     // background->SetBranchAddress( "weight", &weight );
458     // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
459     // for (UInt_t i=0; i<background->GetEntries(); i++) {
460     // background->GetEntry(i);
461     // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
462     // // add training and test events; here: first half is training, second is testing
463     // // note that the weight can also be event-wise
464     // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
465     // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight );
466     // }
467     // --- end ------------------------------------------------------------
468     //
469     // --- end of tree registration
470 benhoob 1.4
471 benhoob 1.1 // Set individual event weights (the variables must exist in the original TTree)
472 benhoob 1.4 factory->SetSignalWeightExpression ("event_scale1fb * lepsoft_fr");
473     factory->SetBackgroundWeightExpression("event_scale1fb * lepsoft_fr");
474    
475     if( doMultipleOutputs ){
476     multifactory->AddTree(signal,"Signal");
477     multifactory->SetSignalWeightExpression ("event_scale1fb");
478     multifactory->SetBackgroundWeightExpression("event_scale1fb");
479     multifactory->SetWeightExpression("event_scale1fb");
480    
481     if( includeBkg["ww"] ){
482     TTree* ww = (TTree*) chww;
483     multifactory->AddTree(ww,"WW");
484     cout << "Added WW to multi-MVA" << endl;
485     }
486     if( includeBkg["wjets"] ){
487     TTree* wjets = (TTree*) chwjets;
488     multifactory->AddTree(wjets,"WJets");
489     cout << "Added W+jets to multi-MVA" << endl;
490     }
491     if( includeBkg["tt"] ){
492     TTree* tt = (TTree*) chtt;
493     multifactory->AddTree(tt,"tt");
494     cout << "Added ttbar multi-MVA" << endl;
495     }
496     }
497 benhoob 1.1
498     // Apply additional cuts on the signal and background samples (can be different)
499 benhoob 1.2 TCut mycuts = sel; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
500     TCut mycutb = sel; // for example: TCut mycutb = "abs(var1)<0.5";
501 benhoob 1.1
502     // Tell the factory how to use the training and testing events
503     //
504     // If no numbers of events are given, half of the events in the tree are used
505     // for training, and the other half for testing:
506     // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
507     // To also specify the number of testing events, use:
508     // factory->PrepareTrainingAndTestTree( mycut,
509     // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
510 benhoob 1.4
511 benhoob 1.2 //Use random splitting
512 benhoob 1.1 factory->PrepareTrainingAndTestTree( mycuts, mycutb,
513     "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
514    
515 benhoob 1.4 if( doMultipleOutputs ){
516     multifactory->PrepareTrainingAndTestTree( mycuts, mycutb,
517     "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
518     }
519    
520 benhoob 1.2 //Use alternate splitting
521     //(this is preferable since its easier to track which events were used for training, but the job crashes! need to fix this...)
522     //factory->PrepareTrainingAndTestTree( mycuts, mycutb,
523     // "nTrain_Signal=0:nTrain_Background=0:SplitMode=Alternate:NormMode=NumEvents:!V" );
524    
525 benhoob 1.1 // ---- Book MVA methods
526     //
527     // Please lookup the various method configuration options in the corresponding cxx files, eg:
528     // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
529     // it is possible to preset ranges in the option string in which the cut optimisation should be done:
530     // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable
531    
532     // Cut optimisation
533     if (Use["Cuts"])
534     factory->BookMethod( TMVA::Types::kCuts, "Cuts",
535     "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );
536    
537     if (Use["CutsD"])
538     factory->BookMethod( TMVA::Types::kCuts, "CutsD",
539     "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );
540    
541     if (Use["CutsPCA"])
542     factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
543     "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );
544    
545     if (Use["CutsGA"])
546     factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
547     "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );
548    
549     if (Use["CutsSA"])
550     factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
551     "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
552    
553     // Likelihood ("naive Bayes estimator")
554     if (Use["Likelihood"])
555     factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
556     "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
557    
558     // Decorrelated likelihood
559     if (Use["LikelihoodD"])
560     factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
561     "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );
562    
563     // PCA-transformed likelihood
564     if (Use["LikelihoodPCA"])
565     factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
566     "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" );
567    
568     // Use a kernel density estimator to approximate the PDFs
569     if (Use["LikelihoodKDE"])
570     factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
571     "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );
572    
573     // Use a variable-dependent mix of splines and kernel density estimator
574     if (Use["LikelihoodMIX"])
575     factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
576     "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );
577    
578     // Test the multi-dimensional probability density estimator
579     // here are the options strings for the MinMax and RMS methods, respectively:
580     // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
581     // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
582     if (Use["PDERS"])
583     factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
584     "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );
585    
586     if (Use["PDERSD"])
587     factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
588     "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );
589    
590     if (Use["PDERSPCA"])
591     factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
592     "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );
593    
594     // Multi-dimensional likelihood estimator using self-adapting phase-space binning
595     if (Use["PDEFoam"])
596     factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
597     "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );
598    
599     if (Use["PDEFoamBoost"])
600     factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
601     "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );
602    
603     // K-Nearest Neighbour classifier (KNN)
604     if (Use["KNN"])
605     factory->BookMethod( TMVA::Types::kKNN, "KNN",
606     "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
607    
608     // H-Matrix (chi2-squared) method
609     if (Use["HMatrix"])
610     factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" );
611    
612     // Linear discriminant (same as Fisher discriminant)
613     if (Use["LD"])
614     factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
615    
616     // Fisher discriminant (same as LD)
617     if (Use["Fisher"])
618     factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
619    
620     // Fisher with Gauss-transformed input variables
621     if (Use["FisherG"])
622     factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );
623    
624     // Composite classifier: ensemble (tree) of boosted Fisher classifiers
625     if (Use["BoostedFisher"])
626     factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher",
627     "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" );
628    
629     // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
630     if (Use["FDA_MC"])
631     factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
632     "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );
633    
634     if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
635     factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
636     "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
637    
638     if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
639     factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
640     "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
641    
642     if (Use["FDA_MT"])
643     factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
644     "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
645    
646     if (Use["FDA_GAMT"])
647     factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
648     "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
649    
650     if (Use["FDA_MCMT"])
651     factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
652     "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );
653    
654     // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
655     if (Use["MLP"])
656     factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );
657    
658     if (Use["MLPBFGS"])
659     factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );
660    
661     if (Use["MLPBNN"])
662     factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators
663    
664     // CF(Clermont-Ferrand)ANN
665     if (Use["CFMlpANN"])
666     factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:...
667    
668     // Tmlp(Root)ANN
669     if (Use["TMlpANN"])
670     factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:...
671    
672     // Support Vector Machine
673     if (Use["SVM"])
674     factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
675    
676     // Boosted Decision Trees
677     if (Use["BDTG"]) // Gradient Boost
678     factory->BookMethod( TMVA::Types::kBDT, "BDTG",
679     "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" );
680    
681     if (Use["BDT"]) // Adaptive Boost
682     factory->BookMethod( TMVA::Types::kBDT, "BDT",
683     "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
684    
685     if (Use["BDTB"]) // Bagging
686     factory->BookMethod( TMVA::Types::kBDT, "BDTB",
687     "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
688    
689     if (Use["BDTD"]) // Decorrelation + Adaptive Boost
690     factory->BookMethod( TMVA::Types::kBDT, "BDTD",
691     "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );
692    
693     // RuleFit -- TMVA implementation of Friedman's method
694     if (Use["RuleFit"])
695     factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
696     "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );
697    
698 benhoob 1.4 if( doMultipleOutputs ){
699     if (Use["multi_BDTG"]) // gradient boosted decision trees
700     multifactory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8");
701     if (Use["multi_MLP"]) // neural network
702     multifactory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE");
703     if (Use["multi_FDA_GA"]) // functional discriminant with GA minimizer
704     multifactory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
705     }
706    
707 benhoob 1.1 // For an example of the category classifier usage, see: TMVAClassificationCategory
708    
709     // --------------------------------------------------------------------------------------------------
710    
711     // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events
712    
713     // factory->OptimizeAllMethods("SigEffAt001","Scan");
714     // factory->OptimizeAllMethods("ROCIntegral","GA");
715    
716     // --------------------------------------------------------------------------------------------------
717    
718     // ---- Now you can tell the factory to train, test, and evaluate the MVAs
719 benhoob 1.4
720 benhoob 1.1 // Train MVAs using the set of training events
721     factory->TrainAllMethods();
722 benhoob 1.4
723 benhoob 1.1 // ---- Evaluate all MVAs using the set of test events
724     factory->TestAllMethods();
725 benhoob 1.4
726 benhoob 1.1 // ----- Evaluate and compare performance of all configured MVAs
727     factory->EvaluateAllMethods();
728 benhoob 1.4
729     if( doMultipleOutputs ){
730     // Train nulti-MVAs using the set of training events
731     multifactory->TrainAllMethods();
732    
733     // ---- Evaluate all multi-MVAs using the set of test events
734     multifactory->TestAllMethods();
735    
736     // ----- Evaluate and compare performance of all configured multi-MVAs
737     multifactory->EvaluateAllMethods();
738     }
739    
740 benhoob 1.1 // --------------------------------------------------------------
741    
742     // Save the output
743     outputFile->Close();
744 benhoob 1.4 if( doMultipleOutputs ) multioutputFile->Close();
745 benhoob 1.1
746     std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
747     std::cout << "==> TMVAClassification is done!" << std::endl;
748 benhoob 1.4
749 benhoob 1.1 delete factory;
750    
751     // Launch the GUI for the root macros
752     if (!gROOT->IsBatch()) TMVAGui( outfileName );
753     }