MultivariateAnalysis/macros/TMVAnalysis.C

// @(#)root/tmva $Id: TMVAnalysis.C,v 1.3 2009/07/06 11:05:18 sapta Exp $
/**********************************************************************************
 * Project   : TMVA - a Root-integrated toolkit for multivariate data analysis    *
 * Package   : TMVA                                                               *
 * Root Macro: TMVAnalysis                                                        *
 *                                                                                *
 * This macro provides examples for the training and testing of all the           *
 * TMVA classifiers.                                                              *
 *                                                                                *
 * As input data is used a toy-MC sample consisting of four Gaussian-distributed  *
 * and linearly correlated input variables.                                       *
 *                                                                                *
 * The methods to be used can be switched on and off by means of booleans, or     *
 * via the prompt command, for example:                                           *
 *                                                                                *
 *    root -l TMVAnalysis.C\(\"Fisher,Likelihood\"\)                              *
 *                                                                                *
 * (note that the backslashes are mandatory)                                      *
 *                                                                                *
 * The output file "TMVA.root" can be analysed with the use of dedicated          *
 * macros (simply say: root -l <macro.C>), which can be conveniently              *
 * invoked through a GUI that will appear at the end of the run of this macro.    *
 **********************************************************************************/

#include <iostream>

#include "TCut.h"
#include "TFile.h"
#include "TSystem.h"
#include "TTree.h"
// requires links
#include "TMVA/Factory.h"
#include "TMVA/Tools.h"
#include "TMVA/Config.h"

#include "TMVAGui.C"

using namespace std;

// ---------------------------------------------------------------
// choose MVA methods to be trained + tested
Bool_t Use_Cuts            = 0;
Bool_t Use_CutsD           = 0;
Bool_t Use_CutsGA          = 1;
// ---
Bool_t Use_Likelihood      = 1;
Bool_t Use_LikelihoodD     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
Bool_t Use_LikelihoodPCA   = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
Bool_t Use_LikelihoodKDE   = 0;
Bool_t Use_LikelihoodMIX   = 0;
// ---
Bool_t Use_PDERS           = 1;
Bool_t Use_PDERSD          = 0;
Bool_t Use_PDERSPCA        = 0;
Bool_t Use_KNN             = 1;
// ---
Bool_t Use_HMatrix         = 1;
Bool_t Use_Fisher          = 1;
// ---
Bool_t Use_FDA_GA          = 0;
Bool_t Use_FDA_MC          = 0;
Bool_t Use_FDA_SA          = 0;
Bool_t Use_FDA_MT          = 1;
Bool_t Use_FDA_GAMT        = 0;
Bool_t Use_FDA_MCMT        = 0;
// ---
Bool_t Use_MLP             = 1; // this is the recommended ANN
Bool_t Use_CFMlpANN        = 0; 
Bool_t Use_TMlpANN         = 0; 
// ---
Bool_t Use_BDT             = 1;
Bool_t Use_BDTD            = 0;
// ---
Bool_t Use_RuleFitTMVA     = 1;
Bool_t Use_RuleFitJF       = 0;
// ---
Bool_t Use_SVM_Gauss       = 1;
Bool_t Use_SVM_Poly        = 0;
Bool_t Use_SVM_Lin         = 0;
// ---------------------------------------------------------------

// read input data file with ascii format (otherwise ROOT) ?
Bool_t ReadDataFromAsciiIFormat = kFALSE;

void TMVAnalysis( TString myMethodList = "" ) 
{
   // explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the 
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAnalysis.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   TList* mlist = TMVA::Tools::ParseFormatLine( myMethodList, " :," );

   if (mlist->GetSize()>0) {
      Use_CutsGA = Use_CutsD = Use_Cuts
         = Use_LikelihoodKDE = Use_LikelihoodMIX = Use_LikelihoodPCA = Use_LikelihoodD = Use_Likelihood
         = Use_PDERSPCA = Use_PDERSD = Use_PDERS 
         = Use_KNN
         = Use_MLP = Use_CFMlpANN = Use_TMlpANN
         = Use_HMatrix = Use_Fisher = Use_BDTD = Use_BDT
         = Use_RuleFitTMVA = Use_RuleFitJF
         = Use_SVM_Gauss = Use_SVM_Poly = Use_SVM_Lin 
         = Use_FDA_GA = Use_FDA_MC = Use_FDA_SA = Use_FDA_MT = Use_FDA_GAMT = Use_FDA_MCMT 
         = 0;

      if (mlist->FindObject( "Cuts"          ) != 0) Use_Cuts          = 1; 
      if (mlist->FindObject( "CutsD"         ) != 0) Use_CutsD         = 1; 
      if (mlist->FindObject( "CutsGA"        ) != 0) Use_CutsGA        = 1; 
      if (mlist->FindObject( "Likelihood"    ) != 0) Use_Likelihood    = 1; 
      if (mlist->FindObject( "LikelihoodD"   ) != 0) Use_LikelihoodD   = 1; 
      if (mlist->FindObject( "LikelihoodPCA" ) != 0) Use_LikelihoodPCA = 1; 
      if (mlist->FindObject( "LikelihoodKDE" ) != 0) Use_LikelihoodKDE = 1; 
      if (mlist->FindObject( "LikelihoodMIX" ) != 0) Use_LikelihoodMIX = 1; 
      if (mlist->FindObject( "PDERSPCA"      ) != 0) Use_PDERSPCA      = 1; 
      if (mlist->FindObject( "PDERSD"        ) != 0) Use_PDERSD        = 1; 
      if (mlist->FindObject( "PDERS"         ) != 0) Use_PDERS         = 1; 
      if (mlist->FindObject( "KNN"           ) != 0) Use_KNN           = 1; 
      if (mlist->FindObject( "HMatrix"       ) != 0) Use_HMatrix       = 1; 
      if (mlist->FindObject( "Fisher"        ) != 0) Use_Fisher        = 1; 
      if (mlist->FindObject( "MLP"           ) != 0) Use_MLP           = 1; 
      if (mlist->FindObject( "CFMlpANN"      ) != 0) Use_CFMlpANN      = 1; 
      if (mlist->FindObject( "TMlpANN"       ) != 0) Use_TMlpANN       = 1; 
      if (mlist->FindObject( "BDTD"          ) != 0) Use_BDTD          = 1; 
      if (mlist->FindObject( "BDT"           ) != 0) Use_BDT           = 1; 
      if (mlist->FindObject( "RuleFitJF"     ) != 0) Use_RuleFitJF     = 1; 
      if (mlist->FindObject( "RuleFitTMVA"   ) != 0) Use_RuleFitTMVA   = 1; 
      if (mlist->FindObject( "SVM_Gauss"     ) != 0) Use_SVM_Gauss     = 1; 
      if (mlist->FindObject( "SVM_Poly"      ) != 0) Use_SVM_Poly      = 1; 
      if (mlist->FindObject( "SVM_Lin"       ) != 0) Use_SVM_Lin       = 1; 
      if (mlist->FindObject( "FDA_MC"        ) != 0) Use_FDA_MC        = 1; 
      if (mlist->FindObject( "FDA_GA"        ) != 0) Use_FDA_GA        = 1; 
      if (mlist->FindObject( "FDA_SA"        ) != 0) Use_FDA_SA        = 1; 
      if (mlist->FindObject( "FDA_MT"        ) != 0) Use_FDA_MT        = 1; 
      if (mlist->FindObject( "FDA_GAMT"      ) != 0) Use_FDA_GAMT      = 1; 
      if (mlist->FindObject( "FDA_MCMT"      ) != 0) Use_FDA_MCMT      = 1; 

      delete mlist;
   }
  
   std::cout << "Start Test TMVAnalysis" << std::endl
             << "======================" << std::endl
             << std::endl;
   std::cout << "Testing all standard methods may take about 10 minutes of running..." << std::endl;

   // Create a new root output file.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   TMVA::Factory *factory = new TMVA::Factory( "TMVAnalysis", outputFile, Form("!V:%sColor", gROOT->IsBatch()?"!":"") );

   // if you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   if (ReadDataFromAsciiIFormat) {
      // load the signal and background event samples from ascii files
      // format in file must be:
      // var1/F:var2/F:var3/F:var4/F
      // 0.04551   0.59923   0.32400   -0.19170
      // ...

      TString datFileS = "data/toy_sig_lincorr.dat";
      TString datFileB = "data/toy_bkg_lincorr.dat";
      if (!factory->SetInputTrees( datFileS, datFileB )) exit(1);
   }
   else {
      // load the signal and background event samples from ROOT trees
      TFile *input(0);
<<<<<<< TMVAnalysis.C
      // TString fname = "./tmva_training.root";
      TString fname = "/data1/sapta/cmssw/CMSSW_2_2_6/src/BTagTest/BTagSoftLeptonAnalyzer/test/test_sig_back.root";
=======
      // TString fname = "./tmva_training.root";
      TString fname = "/uscms_data/d2/sapta/work/CMSSW_2_2_3/src/BTagTest/BTagSoftLeptonAnalyzer/test/test_sig_back.root";
>>>>>>> 1.3
      if (!gSystem->AccessPathName( fname )) {
         // first we try to find tmva_example.root in the local directory
         std::cout << "--- TMVAnalysis  : accessing " << fname << std::endl;
         input = TFile::Open( fname );
      } 
      else { 
         // second we try accessing the file via the web from
         // http://root.cern.ch/files/tmva_example.root
         std::cout << "--- TMVAnalysis  : accessing tmva_example.root file from http://root.cern.ch/files" << std::endl;
         std::cout << "--- TMVAnalysis  : for faster startup you may consider downloading it into you local directory" << std::endl;
         input = TFile::Open( "http://root.cern.ch/files/tmva_example.root" );
      }

      if (!input) {
         std::cout << "ERROR: could not open data file" << std::endl;
         exit(1);
      }

      TTree *signal     = (TTree*)input->Get("signal");
      TTree *background = (TTree*)input->Get("background");

      // global event weights (see below for setting event-wise weights)
      Double_t signalWeight     = 1.0;
      Double_t backgroundWeight = 1.0;

      factory->AddSignalTree    ( signal,     signalWeight );
      factory->AddBackgroundTree( background, backgroundWeight );
   }
   
   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
<<<<<<< TMVAnalysis.C
   factory->AddVariable("n_vertices", 'F');
   factory->AddVariable("a_chi2", 'F');
   factory->AddVariable("a_ndof", 'F');
  // factory->AddVariable("a_normalized_chi2", 'F');
   factory->AddVariable("max_dR", 'F');
   factory->AddVariable("n_jet_tracks", 'F');
   factory->AddVariable("n_vertex_tracks_size", 'F');
=======
   factory->AddVariable("n_vertices", 'F');
   factory->AddVariable("a_chi2", 'F');
   factory->AddVariable("a_ndof", 'F');
   factory->AddVariable("a_normalized_chi2", 'F');
   factory->AddVariable("max_dR", 'F');
   factory->AddVariable("n_jet_tracks", 'F');
   factory->AddVariable("n_vertex_tracks_size", 'F');
>>>>>>> 1.3

   // This would set individual event weights (the variables defined in the 
   // expression need to exist in the original TTree)
   //factory->SetWeightExpression("event_weight");

   // Apply additional cuts on the signal and background sample.
//   #include"/uscms_data/d2/sapta/work/CMSSW_2_2_3/src/BTagTest/BTagSoftLeptonAnalyzer/test/LJMet/MultivariateAnalysis/macros/cuts.C"   
   TCut mycut = "";

   cout<<"0"<<endl;

   // tell the factory to use all remaining events in the trees after training for testing:
<<<<<<< TMVAnalysis.C
   TString split_opt = "NSigTrain=965:NBkgTrain=965:SplitMode=Random:NormMode=NumEvents:V";

   cout<<"0.5"<<endl;
=======
   TString split_opt = "NSigTrain=3000:NBkgTrain=3000:SplitMode=Random:NormMode=NumEvents:!V";

   cout<<"0.5"<<endl;
>>>>>>> 1.3
   factory->PrepareTrainingAndTestTree( mycut, split_opt );  

   // If no numbers of events are given, half of the events in the tree are used for training, and 
   // the other half for testing:
   //   factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  
   // To also specify the number of testing events, use:
   //   factory->PrepareTrainingAndTestTree( mycut, 
   //                                        "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );  

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc.
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   cout<<"1"<<endl;

   // Cut optimisation
   if (Use_Cuts) 
     factory->BookMethod( TMVA::Types::kCuts, "Cuts", 
                          "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use_CutsD) 
     factory->BookMethod( TMVA::Types::kCuts, "CutsD", 
                          "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use_CutsGA)
     factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                         "!H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=100:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp=FSmart" );
   
   // Likelihood
   if (Use_Likelihood) 
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", 
                           "H:V:!TransformOutput:PDFInterpol=Spline2" );
//                         "H:V:!TransformOutput:PDFInterpol=Spline2:\
//NSmoothSig[0]=10:NSmoothSig[1]=10:NSmoothSig[2]=10:NSmoothSig[3]=100:NSmoothSig[4]=100:NSmoothSig[5]=100:NSmoothSig[6]=100:\
//NSmoothBkg[0]=10:NSmoothBkg[1]=10:NSmoothBkg[2]=10:NSmoothBkg[3]=100:NSmoothBkg[4]=100:NSmoothBkg[5]=100:NSmoothBkg[6]=100:\
//NSmooth=10:NAvEvtPerBin=50" ); 
   //NSmoothSig[0]=10:NSmoothSig[1]=10:NSmoothSig[2]=10:NSmoothSig[3]=10:NSmoothSig[4]=10:NSmoothSig[5]=10:NSmoothSig[6]=10:NSmoothSig[7]=10:NSmoothSig[8]=10:NSmoothSig[9]=10:NSmoothSig[10]=10:NSmoothSig[11]=10:NSmoothSig[12]=10:NSmoothSig[13]=10:NSmoothSig[14]=10:NSmoothSig[15]=10:NSmoothSig[16]=10: \
   //NSmoothBkg[0]=10:NSmoothBkg[1]=10:NSmoothBkg[2]=10:NSmoothBkg[3]=10:NSmoothBkg[4]=10:NSmoothBkg[5]=10:NSmoothBkg[6]=10:NSmoothBkg[7]=10:NSmoothBkg[8]=10:NSmoothBkg[9]=10:NSmoothBkg[10]=10:NSmoothBkg[11]=10:NSmoothBkg[12]=10:NSmoothBkg[13]=10:NSmoothBkg[14]=10:NSmoothBkg[15]=10:NSmoothBkg[16]=10: \

   cout<<"2"<<endl;

   // test the decorrelated likelihood
   if (Use_LikelihoodD) 
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", 
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=100:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); 

   if (Use_LikelihoodPCA) 
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", 
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=100:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 
 
   // test the new kernel density estimator
   if (Use_LikelihoodKDE) 
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", 
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // test the mixed splines and kernel density estimator (depending on which variable)
   if (Use_LikelihoodMIX) 
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", 
                           "!H:!V:!TransformOutput:PDFInterpol[0]=KDE:PDFInterpol[1]=KDE:PDFInterpol[2]=Spline2:PDFInterpol[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // PDE - RS method
   if (Use_PDERS)
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS", 
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:InitialScale=0.99" );
   
   if (Use_PDERSD) 
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", 
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:InitialScale=0.99:VarTransform=Decorrelate" );

   if (Use_PDERSPCA) 
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", 
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:InitialScale=0.99:VarTransform=PCA" );
  

   cout<<"3"<<endl;

   // K-Nearest Neighbour classifier (KNN)
   if (Use_KNN)
      factory->BookMethod( TMVA::Types::kKNN, "KNN", 
                           "nkNN=40:TreeOptDepth=6:ScaleFrac=0.8:!UseKernel:!Trim" );  

   // H-Matrix (chi2-squared) method
   if (Use_HMatrix)
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); 

   // Fisher discriminant
   if (Use_Fisher)
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", 
                           "H:!V:!Normalise:CreateMVAPdfs:Fisher:NbinsMVAPdf=50:NsmoothMVAPdf=1" );    

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit or GA
   if (Use_FDA_MC) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                          "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );
   
   if (Use_FDA_GA) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=100:Cycles=3:Steps=20:Trim=True:SaveBestGen=0" );

   if (Use_FDA_SA) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=50000:TemperatureGradient=0.7:InitialTemperature=2000000:MinTemperature=500:Eps=1e-04:NFunLoops=5:NEps=4" );

   if (Use_FDA_MT) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use_FDA_GAMT) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use_FDA_MCMT) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );


   cout<<"4"<<endl;

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use_MLP)
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "Normalise:H:!V:NCycles=200:HiddenLayers=N+1,N:TestRate=5" );

   // CF(Clermont-Ferrand)ANN
   if (Use_CFMlpANN)
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=500:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  
  
   // Tmlp(Root)ANN
   if (Use_TMlpANN)
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...
  
   // Support Vector Machines using three different Kernel types (Gauss, polynomial and linear)
   if (Use_SVM_Gauss)
      factory->BookMethod( TMVA::Types::kSVM, "SVM_Gauss", "Sigma=2:C=1:Tol=0.001:Kernel=Gauss" );

   if (Use_SVM_Poly)
      factory->BookMethod( TMVA::Types::kSVM, "SVM_Poly", "Order=4:Theta=1:C=0.1:Tol=0.001:Kernel=Polynomial" );

   if (Use_SVM_Lin)
      factory->BookMethod( TMVA::Types::kSVM, "SVM_Lin", "!H:!V:Kernel=Linear:C=1:Tol=0.001" );  

   // Boosted Decision Trees (second one with decorrelation)
   if (Use_BDT)
      factory->BookMethod( TMVA::Types::kBDT, "BDT", 
                           "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=4.5" );
   if (Use_BDTD)
      factory->BookMethod( TMVA::Types::kBDT, "BDTD", 
                           "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=4.5:VarTransform=Decorrelate" );


   cout<<"5"<<endl;

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use_RuleFitTMVA)
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFitTMVA", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=100000:GDErrScale=1.02" );

   // Friedman's RuleFit method, implementation by J. Friedman
   if (Use_RuleFitJF)
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFitJF",
                           "!V:RuleFitModule=RFFriedman:Model=ModRuleLinear:GDStep=0.01:GDNSteps=10000:GDErrScale=1.1:RFNendnodes=4" );

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> wrote root file TMVA.root" << std::endl;
   std::cout << "==> TMVAnalysis is done!" << std::endl;      

   // Clean up
   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
Revision:	1.4
Committed:	Thu Jul 23 09:54:40 2009 UTC (15 years, 9 months ago) by sapta
Content type:	text/plain
Branch:	MAIN
CVS Tags:	V00-03-01, ZMorph_BASE_20100408, gak040610_morphing, V00-02-02, gak011410, gak010310, ejterm2010_25nov2009, V00-02-01, V00-02-00, gak112409, CMSSW_22X_branch_base, segala101609, HEAD
Branch point for:	ZMorph-V00-03-01, CMSSW_22X_branch
Changes since 1.3:	+22 -1 lines
Log Message:	Random Update
#	User	Rev	Content
1	sapta	1.4	// @(#)root/tmva $Id: TMVAnalysis.C,v 1.3 2009/07/06 11:05:18 sapta Exp $
2	kukartse	1.1	/**********************************************************************************
3			* Project : TMVA - a Root-integrated toolkit for multivariate data analysis *
4			* Package : TMVA *
5			* Root Macro: TMVAnalysis *
6			* *
7			* This macro provides examples for the training and testing of all the *
8			* TMVA classifiers. *
9			* *
10			* As input data is used a toy-MC sample consisting of four Gaussian-distributed *
11			* and linearly correlated input variables. *
12			* *
13			* The methods to be used can be switched on and off by means of booleans, or *
14			* via the prompt command, for example: *
15			* *
16			* root -l TMVAnalysis.C\(\"Fisher,Likelihood\"\) *
17			* *
18			* (note that the backslashes are mandatory) *
19			* *
20			* The output file "TMVA.root" can be analysed with the use of dedicated *
21			* macros (simply say: root -l <macro.C>), which can be conveniently *
22			* invoked through a GUI that will appear at the end of the run of this macro. *
23			**********************************************************************************/
24
25			#include <iostream>
26
27			#include "TCut.h"
28			#include "TFile.h"
29			#include "TSystem.h"
30			#include "TTree.h"
31			// requires links
32	kukartse	1.2	#include "TMVA/Factory.h"
33			#include "TMVA/Tools.h"
34			#include "TMVA/Config.h"
35	kukartse	1.1
36			#include "TMVAGui.C"
37	kukartse	1.2
38	sapta	1.3	using namespace std;
39
40	kukartse	1.1	// ---------------------------------------------------------------
41			// choose MVA methods to be trained + tested
42			Bool_t Use_Cuts = 0;
43			Bool_t Use_CutsD = 0;
44			Bool_t Use_CutsGA = 1;
45			// ---
46			Bool_t Use_Likelihood = 1;
47			Bool_t Use_LikelihoodD = 0; // the "D" extension indicates decorrelated input variables (see option strings)
48			Bool_t Use_LikelihoodPCA = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
49			Bool_t Use_LikelihoodKDE = 0;
50			Bool_t Use_LikelihoodMIX = 0;
51			// ---
52			Bool_t Use_PDERS = 1;
53			Bool_t Use_PDERSD = 0;
54			Bool_t Use_PDERSPCA = 0;
55			Bool_t Use_KNN = 1;
56			// ---
57			Bool_t Use_HMatrix = 1;
58			Bool_t Use_Fisher = 1;
59			// ---
60			Bool_t Use_FDA_GA = 0;
61			Bool_t Use_FDA_MC = 0;
62			Bool_t Use_FDA_SA = 0;
63			Bool_t Use_FDA_MT = 1;
64			Bool_t Use_FDA_GAMT = 0;
65			Bool_t Use_FDA_MCMT = 0;
66			// ---
67			Bool_t Use_MLP = 1; // this is the recommended ANN
68			Bool_t Use_CFMlpANN = 0;
69			Bool_t Use_TMlpANN = 0;
70			// ---
71			Bool_t Use_BDT = 1;
72			Bool_t Use_BDTD = 0;
73			// ---
74			Bool_t Use_RuleFitTMVA = 1;
75			Bool_t Use_RuleFitJF = 0;
76			// ---
77			Bool_t Use_SVM_Gauss = 1;
78			Bool_t Use_SVM_Poly = 0;
79			Bool_t Use_SVM_Lin = 0;
80			// ---------------------------------------------------------------
81
82			// read input data file with ascii format (otherwise ROOT) ?
83			Bool_t ReadDataFromAsciiIFormat = kFALSE;
84
85			void TMVAnalysis( TString myMethodList = "" )
86			{
87			// explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
88			// if you use your private .rootrc, or run from a different directory, please copy the
89			// corresponding lines from .rootrc
90
91			// methods to be processed can be given as an argument; use format:
92			//
93			// mylinux~> root -l TMVAnalysis.C\(\"myMethod1,myMethod2,myMethod3\"\)
94			//
95			TList* mlist = TMVA::Tools::ParseFormatLine( myMethodList, " :," );
96
97			if (mlist->GetSize()>0) {
98			Use_CutsGA = Use_CutsD = Use_Cuts
99			= Use_LikelihoodKDE = Use_LikelihoodMIX = Use_LikelihoodPCA = Use_LikelihoodD = Use_Likelihood
100			= Use_PDERSPCA = Use_PDERSD = Use_PDERS
101			= Use_KNN
102			= Use_MLP = Use_CFMlpANN = Use_TMlpANN
103			= Use_HMatrix = Use_Fisher = Use_BDTD = Use_BDT
104			= Use_RuleFitTMVA = Use_RuleFitJF
105			= Use_SVM_Gauss = Use_SVM_Poly = Use_SVM_Lin
106			= Use_FDA_GA = Use_FDA_MC = Use_FDA_SA = Use_FDA_MT = Use_FDA_GAMT = Use_FDA_MCMT
107			= 0;
108
109			if (mlist->FindObject( "Cuts" ) != 0) Use_Cuts = 1;
110			if (mlist->FindObject( "CutsD" ) != 0) Use_CutsD = 1;
111			if (mlist->FindObject( "CutsGA" ) != 0) Use_CutsGA = 1;
112			if (mlist->FindObject( "Likelihood" ) != 0) Use_Likelihood = 1;
113			if (mlist->FindObject( "LikelihoodD" ) != 0) Use_LikelihoodD = 1;
114			if (mlist->FindObject( "LikelihoodPCA" ) != 0) Use_LikelihoodPCA = 1;
115			if (mlist->FindObject( "LikelihoodKDE" ) != 0) Use_LikelihoodKDE = 1;
116			if (mlist->FindObject( "LikelihoodMIX" ) != 0) Use_LikelihoodMIX = 1;
117			if (mlist->FindObject( "PDERSPCA" ) != 0) Use_PDERSPCA = 1;
118			if (mlist->FindObject( "PDERSD" ) != 0) Use_PDERSD = 1;
119			if (mlist->FindObject( "PDERS" ) != 0) Use_PDERS = 1;
120			if (mlist->FindObject( "KNN" ) != 0) Use_KNN = 1;
121			if (mlist->FindObject( "HMatrix" ) != 0) Use_HMatrix = 1;
122			if (mlist->FindObject( "Fisher" ) != 0) Use_Fisher = 1;
123			if (mlist->FindObject( "MLP" ) != 0) Use_MLP = 1;
124			if (mlist->FindObject( "CFMlpANN" ) != 0) Use_CFMlpANN = 1;
125			if (mlist->FindObject( "TMlpANN" ) != 0) Use_TMlpANN = 1;
126			if (mlist->FindObject( "BDTD" ) != 0) Use_BDTD = 1;
127			if (mlist->FindObject( "BDT" ) != 0) Use_BDT = 1;
128			if (mlist->FindObject( "RuleFitJF" ) != 0) Use_RuleFitJF = 1;
129			if (mlist->FindObject( "RuleFitTMVA" ) != 0) Use_RuleFitTMVA = 1;
130			if (mlist->FindObject( "SVM_Gauss" ) != 0) Use_SVM_Gauss = 1;
131			if (mlist->FindObject( "SVM_Poly" ) != 0) Use_SVM_Poly = 1;
132			if (mlist->FindObject( "SVM_Lin" ) != 0) Use_SVM_Lin = 1;
133			if (mlist->FindObject( "FDA_MC" ) != 0) Use_FDA_MC = 1;
134			if (mlist->FindObject( "FDA_GA" ) != 0) Use_FDA_GA = 1;
135			if (mlist->FindObject( "FDA_SA" ) != 0) Use_FDA_SA = 1;
136			if (mlist->FindObject( "FDA_MT" ) != 0) Use_FDA_MT = 1;
137			if (mlist->FindObject( "FDA_GAMT" ) != 0) Use_FDA_GAMT = 1;
138			if (mlist->FindObject( "FDA_MCMT" ) != 0) Use_FDA_MCMT = 1;
139
140			delete mlist;
141			}
142
143			std::cout << "Start Test TMVAnalysis" << std::endl
144			<< "======================" << std::endl
145			<< std::endl;
146			std::cout << "Testing all standard methods may take about 10 minutes of running..." << std::endl;
147
148			// Create a new root output file.
149			TString outfileName( "TMVA.root" );
150			TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
151
152			// Create the factory object. Later you can choose the methods
153			// whose performance you'd like to investigate. The factory will
154			// then run the performance analysis for you.
155			//
156			// The first argument is the base of the name of all the
157			// weightfiles in the directory weight/
158			//
159			// The second argument is the output file for the training results
160			TMVA::Factory *factory = new TMVA::Factory( "TMVAnalysis", outputFile, Form("!V:%sColor", gROOT->IsBatch()?"!":"") );
161
162			// if you wish to modify default settings
163			// (please check "src/Config.h" to see all available global options)
164			// (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
165			// (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
166
167			if (ReadDataFromAsciiIFormat) {
168			// load the signal and background event samples from ascii files
169			// format in file must be:
170			// var1/F:var2/F:var3/F:var4/F
171			// 0.04551 0.59923 0.32400 -0.19170
172			// ...
173
174			TString datFileS = "data/toy_sig_lincorr.dat";
175			TString datFileB = "data/toy_bkg_lincorr.dat";
176			if (!factory->SetInputTrees( datFileS, datFileB )) exit(1);
177			}
178			else {
179			// load the signal and background event samples from ROOT trees
180			TFile *input(0);
181	sapta	1.4	<<<<<<< TMVAnalysis.C
182			// TString fname = "./tmva_training.root";
183			TString fname = "/data1/sapta/cmssw/CMSSW_2_2_6/src/BTagTest/BTagSoftLeptonAnalyzer/test/test_sig_back.root";
184			=======
185	sapta	1.3	// TString fname = "./tmva_training.root";
186			TString fname = "/uscms_data/d2/sapta/work/CMSSW_2_2_3/src/BTagTest/BTagSoftLeptonAnalyzer/test/test_sig_back.root";
187	sapta	1.4	>>>>>>> 1.3
188	kukartse	1.1	if (!gSystem->AccessPathName( fname )) {
189			// first we try to find tmva_example.root in the local directory
190			std::cout << "--- TMVAnalysis : accessing " << fname << std::endl;
191			input = TFile::Open( fname );
192			}
193			else {
194			// second we try accessing the file via the web from
195			// http://root.cern.ch/files/tmva_example.root
196			std::cout << "--- TMVAnalysis : accessing tmva_example.root file from http://root.cern.ch/files" << std::endl;
197			std::cout << "--- TMVAnalysis : for faster startup you may consider downloading it into you local directory" << std::endl;
198			input = TFile::Open( "http://root.cern.ch/files/tmva_example.root" );
199			}
200
201			if (!input) {
202			std::cout << "ERROR: could not open data file" << std::endl;
203			exit(1);
204			}
205
206	sapta	1.3	TTree signal = (TTree)input->Get("signal");
207			TTree background = (TTree)input->Get("background");
208	kukartse	1.1
209			// global event weights (see below for setting event-wise weights)
210			Double_t signalWeight = 1.0;
211			Double_t backgroundWeight = 1.0;
212
213			factory->AddSignalTree ( signal, signalWeight );
214			factory->AddBackgroundTree( background, backgroundWeight );
215			}
216
217			// Define the input variables that shall be used for the MVA training
218			// note that you may also use variable expressions, such as: "3var1/var2abs(var3)"
219			// [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
220	sapta	1.4	<<<<<<< TMVAnalysis.C
221			factory->AddVariable("n_vertices", 'F');
222			factory->AddVariable("a_chi2", 'F');
223			factory->AddVariable("a_ndof", 'F');
224			// factory->AddVariable("a_normalized_chi2", 'F');
225			factory->AddVariable("max_dR", 'F');
226			factory->AddVariable("n_jet_tracks", 'F');
227			factory->AddVariable("n_vertex_tracks_size", 'F');
228			=======
229	sapta	1.3	factory->AddVariable("n_vertices", 'F');
230			factory->AddVariable("a_chi2", 'F');
231			factory->AddVariable("a_ndof", 'F');
232			factory->AddVariable("a_normalized_chi2", 'F');
233			factory->AddVariable("max_dR", 'F');
234			factory->AddVariable("n_jet_tracks", 'F');
235			factory->AddVariable("n_vertex_tracks_size", 'F');
236	sapta	1.4	>>>>>>> 1.3
237	kukartse	1.1
238			// This would set individual event weights (the variables defined in the
239			// expression need to exist in the original TTree)
240	sapta	1.3	//factory->SetWeightExpression("event_weight");
241	kukartse	1.1
242	sapta	1.3	// Apply additional cuts on the signal and background sample.
243			// #include"/uscms_data/d2/sapta/work/CMSSW_2_2_3/src/BTagTest/BTagSoftLeptonAnalyzer/test/LJMet/MultivariateAnalysis/macros/cuts.C"
244	kukartse	1.2	TCut mycut = "";
245	kukartse	1.1
246	sapta	1.3	cout<<"0"<<endl;
247
248	kukartse	1.1	// tell the factory to use all remaining events in the trees after training for testing:
249	sapta	1.4	<<<<<<< TMVAnalysis.C
250			TString split_opt = "NSigTrain=965:NBkgTrain=965:SplitMode=Random:NormMode=NumEvents:V";
251
252			cout<<"0.5"<<endl;
253			=======
254	kukartse	1.2	TString split_opt = "NSigTrain=3000:NBkgTrain=3000:SplitMode=Random:NormMode=NumEvents:!V";
255	sapta	1.3
256			cout<<"0.5"<<endl;
257	sapta	1.4	>>>>>>> 1.3
258	kukartse	1.2	factory->PrepareTrainingAndTestTree( mycut, split_opt );
259	kukartse	1.1
260			// If no numbers of events are given, half of the events in the tree are used for training, and
261			// the other half for testing:
262			// factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
263			// To also specify the number of testing events, use:
264			// factory->PrepareTrainingAndTestTree( mycut,
265			// "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
266
267			// ---- Book MVA methods
268			//
269			// please lookup the various method configuration options in the corresponding cxx files, eg:
270			// src/MethoCuts.cxx, etc.
271			// it is possible to preset ranges in the option string in which the cut optimisation should be done:
272			// "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable
273
274	sapta	1.3	cout<<"1"<<endl;
275
276	kukartse	1.1	// Cut optimisation
277			if (Use_Cuts)
278			factory->BookMethod( TMVA::Types::kCuts, "Cuts",
279			"!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );
280
281			if (Use_CutsD)
282			factory->BookMethod( TMVA::Types::kCuts, "CutsD",
283			"!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );
284
285			if (Use_CutsGA)
286			factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
287			"!H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=100:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp=FSmart" );
288
289			// Likelihood
290			if (Use_Likelihood)
291			factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
292	kukartse	1.2	"H:V:!TransformOutput:PDFInterpol=Spline2" );
293			// "H:V:!TransformOutput:PDFInterpol=Spline2:\
294			//NSmoothSig[0]=10:NSmoothSig[1]=10:NSmoothSig[2]=10:NSmoothSig[3]=100:NSmoothSig[4]=100:NSmoothSig[5]=100:NSmoothSig[6]=100:\
295			//NSmoothBkg[0]=10:NSmoothBkg[1]=10:NSmoothBkg[2]=10:NSmoothBkg[3]=100:NSmoothBkg[4]=100:NSmoothBkg[5]=100:NSmoothBkg[6]=100:\
296			//NSmooth=10:NAvEvtPerBin=50" );
297			//NSmoothSig[0]=10:NSmoothSig[1]=10:NSmoothSig[2]=10:NSmoothSig[3]=10:NSmoothSig[4]=10:NSmoothSig[5]=10:NSmoothSig[6]=10:NSmoothSig[7]=10:NSmoothSig[8]=10:NSmoothSig[9]=10:NSmoothSig[10]=10:NSmoothSig[11]=10:NSmoothSig[12]=10:NSmoothSig[13]=10:NSmoothSig[14]=10:NSmoothSig[15]=10:NSmoothSig[16]=10: \
298			//NSmoothBkg[0]=10:NSmoothBkg[1]=10:NSmoothBkg[2]=10:NSmoothBkg[3]=10:NSmoothBkg[4]=10:NSmoothBkg[5]=10:NSmoothBkg[6]=10:NSmoothBkg[7]=10:NSmoothBkg[8]=10:NSmoothBkg[9]=10:NSmoothBkg[10]=10:NSmoothBkg[11]=10:NSmoothBkg[12]=10:NSmoothBkg[13]=10:NSmoothBkg[14]=10:NSmoothBkg[15]=10:NSmoothBkg[16]=10: \
299	kukartse	1.1
300	sapta	1.3	cout<<"2"<<endl;
301
302	kukartse	1.1	// test the decorrelated likelihood
303			if (Use_LikelihoodD)
304			factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
305			"!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=100:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );
306
307			if (Use_LikelihoodPCA)
308			factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
309			"!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=100:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" );
310
311			// test the new kernel density estimator
312			if (Use_LikelihoodKDE)
313			factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
314			"!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );
315
316			// test the mixed splines and kernel density estimator (depending on which variable)
317			if (Use_LikelihoodMIX)
318			factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
319			"!H:!V:!TransformOutput:PDFInterpol[0]=KDE:PDFInterpol[1]=KDE:PDFInterpol[2]=Spline2:PDFInterpol[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );
320
321			// PDE - RS method
322			if (Use_PDERS)
323			factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
324			"!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:InitialScale=0.99" );
325
326			if (Use_PDERSD)
327			factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
328			"!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:InitialScale=0.99:VarTransform=Decorrelate" );
329
330			if (Use_PDERSPCA)
331			factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
332			"!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:InitialScale=0.99:VarTransform=PCA" );
333
334	sapta	1.3
335			cout<<"3"<<endl;
336
337	kukartse	1.1	// K-Nearest Neighbour classifier (KNN)
338			if (Use_KNN)
339			factory->BookMethod( TMVA::Types::kKNN, "KNN",
340			"nkNN=40:TreeOptDepth=6:ScaleFrac=0.8:!UseKernel:!Trim" );
341
342			// H-Matrix (chi2-squared) method
343			if (Use_HMatrix)
344			factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" );
345
346			// Fisher discriminant
347			if (Use_Fisher)
348			factory->BookMethod( TMVA::Types::kFisher, "Fisher",
349			"H:!V:!Normalise:CreateMVAPdfs:Fisher:NbinsMVAPdf=50:NsmoothMVAPdf=1" );
350
351			// Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit or GA
352			if (Use_FDA_MC)
353			factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
354			"H:!V:Formula=(0)+(1)x0+(2)x1+(3)x2+(4)x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );
355
356			if (Use_FDA_GA)
357			factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
358			"H:!V:Formula=(0)+(1)x0+(2)x1+(3)x2+(4)x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=100:Cycles=3:Steps=20:Trim=True:SaveBestGen=0" );
359
360			if (Use_FDA_SA)
361			factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
362			"H:!V:Formula=(0)+(1)x0+(2)x1+(3)x2+(4)x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=50000:TemperatureGradient=0.7:InitialTemperature=2000000:MinTemperature=500:Eps=1e-04:NFunLoops=5:NEps=4" );
363
364			if (Use_FDA_MT)
365			factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
366			"H:!V:Formula=(0)+(1)x0+(2)x1+(3)x2+(4)x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
367
368			if (Use_FDA_GAMT)
369			factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
370			"H:!V:Formula=(0)+(1)x0+(2)x1+(3)x2+(4)x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
371
372			if (Use_FDA_MCMT)
373			factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
374			"H:!V:Formula=(0)+(1)x0+(2)x1+(3)x2+(4)x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );
375
376	sapta	1.3
377			cout<<"4"<<endl;
378
379	kukartse	1.1	// TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
380			if (Use_MLP)
381			factory->BookMethod( TMVA::Types::kMLP, "MLP", "Normalise:H:!V:NCycles=200:HiddenLayers=N+1,N:TestRate=5" );
382
383			// CF(Clermont-Ferrand)ANN
384			if (Use_CFMlpANN)
385			factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=500:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:...
386
387			// Tmlp(Root)ANN
388			if (Use_TMlpANN)
389			factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:...
390
391			// Support Vector Machines using three different Kernel types (Gauss, polynomial and linear)
392			if (Use_SVM_Gauss)
393			factory->BookMethod( TMVA::Types::kSVM, "SVM_Gauss", "Sigma=2:C=1:Tol=0.001:Kernel=Gauss" );
394
395			if (Use_SVM_Poly)
396			factory->BookMethod( TMVA::Types::kSVM, "SVM_Poly", "Order=4:Theta=1:C=0.1:Tol=0.001:Kernel=Polynomial" );
397
398			if (Use_SVM_Lin)
399			factory->BookMethod( TMVA::Types::kSVM, "SVM_Lin", "!H:!V:Kernel=Linear:C=1:Tol=0.001" );
400
401			// Boosted Decision Trees (second one with decorrelation)
402			if (Use_BDT)
403			factory->BookMethod( TMVA::Types::kBDT, "BDT",
404			"!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=4.5" );
405			if (Use_BDTD)
406			factory->BookMethod( TMVA::Types::kBDT, "BDTD",
407			"!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=4.5:VarTransform=Decorrelate" );
408
409	sapta	1.3
410			cout<<"5"<<endl;
411
412	kukartse	1.1	// RuleFit -- TMVA implementation of Friedman's method
413			if (Use_RuleFitTMVA)
414	kukartse	1.2	factory->BookMethod( TMVA::Types::kRuleFit, "RuleFitTMVA", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=100000:GDErrScale=1.02" );
415	kukartse	1.1
416			// Friedman's RuleFit method, implementation by J. Friedman
417			if (Use_RuleFitJF)
418			factory->BookMethod( TMVA::Types::kRuleFit, "RuleFitJF",
419			"!V:RuleFitModule=RFFriedman:Model=ModRuleLinear:GDStep=0.01:GDNSteps=10000:GDErrScale=1.1:RFNendnodes=4" );
420
421			// ---- Now you can tell the factory to train, test, and evaluate the MVAs
422
423			// Train MVAs using the set of training events
424			factory->TrainAllMethods();
425
426			// ---- Evaluate all MVAs using the set of test events
427			factory->TestAllMethods();
428
429			// ----- Evaluate and compare performance of all configured MVAs
430			factory->EvaluateAllMethods();
431
432			// --------------------------------------------------------------
433
434			// Save the output
435			outputFile->Close();
436
437			std::cout << "==> wrote root file TMVA.root" << std::endl;
438			std::cout << "==> TMVAnalysis is done!" << std::endl;
439
440			// Clean up
441			delete factory;
442
443			// Launch the GUI for the root macros
444			if (!gROOT->IsBatch()) TMVAGui( outfileName );
445			}