1 |
yangyong |
1.1 |
#include <cstdlib>
|
2 |
|
|
#include <iostream>
|
3 |
|
|
#include <map>
|
4 |
|
|
#include <string>
|
5 |
|
|
|
6 |
|
|
#include "TChain.h"
|
7 |
|
|
#include "TFile.h"
|
8 |
|
|
#include "TTree.h"
|
9 |
|
|
#include "TString.h"
|
10 |
|
|
#include "TObjString.h"
|
11 |
|
|
#include "TSystem.h"
|
12 |
|
|
#include "TROOT.h"
|
13 |
|
|
|
14 |
|
|
#include "TMVARegGui.C"
|
15 |
|
|
|
16 |
|
|
#if not defined(__CINT__) || defined(__MAKECINT__)
|
17 |
|
|
#include "TMVA/Tools.h"
|
18 |
|
|
#include "TMVA/Factory.h"
|
19 |
|
|
#endif
|
20 |
|
|
|
21 |
|
|
using namespace TMVA;
|
22 |
|
|
|
23 |
|
|
int trainecal;
|
24 |
|
|
int trainCut;
|
25 |
|
|
TCut mycut;
|
26 |
|
|
TChain *fChain;
|
27 |
|
|
|
28 |
|
|
|
29 |
|
|
|
30 |
|
|
vector< pair<TString,TString> > vec_trainVar;
|
31 |
|
|
|
32 |
|
|
#include "traincut.cc"
|
33 |
|
|
#include "trainvar.cc"
|
34 |
|
|
|
35 |
|
|
void trainReg(int test_trainecal, TString myMethodList = "BDTG" )
|
36 |
|
|
{
|
37 |
|
|
|
38 |
|
|
|
39 |
|
|
trainecal = test_trainecal;
|
40 |
|
|
|
41 |
|
|
// The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
|
42 |
|
|
// if you use your private .rootrc, or run from a different directory, please copy the
|
43 |
|
|
// corresponding lines from .rootrc
|
44 |
|
|
|
45 |
|
|
// methods to be processed can be given as an argument; use format:
|
46 |
|
|
//
|
47 |
|
|
// mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\)
|
48 |
|
|
//
|
49 |
|
|
|
50 |
|
|
//---------------------------------------------------------------
|
51 |
|
|
// This loads the library
|
52 |
|
|
TMVA::Tools::Instance();
|
53 |
|
|
|
54 |
|
|
// Default MVA methods to be trained + tested
|
55 |
|
|
std::map<std::string,int> Use;
|
56 |
|
|
|
57 |
|
|
// --- Mutidimensional likelihood and Nearest-Neighbour methods
|
58 |
|
|
Use["PDERS"] = 0;
|
59 |
|
|
|
60 |
|
|
//Use["PDEFoam"] = 1;
|
61 |
|
|
Use["PDEFoam"] = 0;
|
62 |
|
|
|
63 |
|
|
Use["KNN"] = 0;
|
64 |
|
|
//
|
65 |
|
|
|
66 |
|
|
// --- Linear Discriminant Analysis
|
67 |
|
|
Use["LD"] = 0;
|
68 |
|
|
|
69 |
|
|
//
|
70 |
|
|
// --- Function Discriminant analysis
|
71 |
|
|
Use["FDA_GA"] = 1;
|
72 |
|
|
|
73 |
|
|
Use["FDA_MC"] = 0;
|
74 |
|
|
Use["FDA_MT"] = 0;
|
75 |
|
|
Use["FDA_GAMT"] = 0;
|
76 |
|
|
//
|
77 |
|
|
// --- Neural Network
|
78 |
|
|
Use["MLP"] = 1;
|
79 |
|
|
//
|
80 |
|
|
// --- Support Vector Machine
|
81 |
|
|
Use["SVM"] = 0;
|
82 |
|
|
//
|
83 |
|
|
// --- Boosted Decision Trees
|
84 |
|
|
Use["BDT"] = 0;
|
85 |
|
|
Use["BDTG"] = 1;
|
86 |
|
|
// ---------------------------------------------------------------
|
87 |
|
|
|
88 |
|
|
std::cout << std::endl;
|
89 |
|
|
std::cout << "==> Start TMVARegression" << std::endl;
|
90 |
|
|
|
91 |
|
|
// Select methods (don't look at this code - not of interest)
|
92 |
|
|
if (myMethodList != "") {
|
93 |
|
|
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
|
94 |
|
|
|
95 |
|
|
std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
|
96 |
|
|
for (UInt_t i=0; i<mlist.size(); i++) {
|
97 |
|
|
std::string regMethod(mlist[i]);
|
98 |
|
|
|
99 |
|
|
if (Use.find(regMethod) == Use.end()) {
|
100 |
|
|
std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
|
101 |
|
|
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
|
102 |
|
|
std::cout << std::endl;
|
103 |
|
|
return;
|
104 |
|
|
}
|
105 |
|
|
Use[regMethod] = 1;
|
106 |
|
|
}
|
107 |
|
|
}
|
108 |
|
|
|
109 |
|
|
// --------------------------------------------------------------------------------------------------
|
110 |
|
|
|
111 |
|
|
// --- Here the preparation phase begins
|
112 |
|
|
|
113 |
|
|
// Create a new root output file
|
114 |
|
|
|
115 |
|
|
TString outfileName( "TMVARegOutput.root" );
|
116 |
|
|
|
117 |
|
|
TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
|
118 |
|
|
|
119 |
|
|
// Create the factory object. Later you can choose the methods
|
120 |
|
|
// whose performance you'd like to investigate. The factory will
|
121 |
|
|
// then run the performance analysis for you.
|
122 |
|
|
//
|
123 |
|
|
// The first argument is the base of the name of all the
|
124 |
|
|
// weightfiles in the directory weight/
|
125 |
|
|
//
|
126 |
|
|
// The second argument is the output file for the training results
|
127 |
|
|
// All TMVA output can be suppressed by removing the "!" (not) in
|
128 |
|
|
// front of the "Silent" argument in the option string
|
129 |
|
|
TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile,
|
130 |
|
|
"!V:!Silent:Color:!DrawProgressBar" );
|
131 |
|
|
// If you wish to modify default settings
|
132 |
|
|
// (please check "src/Config.h" to see all available global options)
|
133 |
|
|
// (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
|
134 |
|
|
// (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
|
135 |
|
|
|
136 |
|
|
// Define the input variables that shall be used for the MVA training
|
137 |
|
|
// note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
|
138 |
|
|
// [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
|
139 |
|
|
|
140 |
|
|
|
141 |
|
|
//factory->AddVariable( "var1", "Variable 1", "units", 'F' );
|
142 |
|
|
//factory->AddVariable( "var2", "Variable 2", "units", 'F' );
|
143 |
|
|
|
144 |
|
|
|
145 |
|
|
fChain = new TChain("Analysis");
|
146 |
|
|
fChain->Add("/mnt/hadoop/user/yangyong//data/photonTree/gjs6.root");
|
147 |
|
|
|
148 |
|
|
|
149 |
|
|
// Apply additional cuts on the signal and background samples (can be different)
|
150 |
|
|
///TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";
|
151 |
|
|
|
152 |
|
|
setTrainingCut();
|
153 |
|
|
setTrainingVar();
|
154 |
|
|
|
155 |
|
|
|
156 |
|
|
for(int j=0; j<int(vec_trainVar.size());j++){
|
157 |
|
|
TString var = vec_trainVar[j].first;
|
158 |
|
|
TString type = vec_trainVar[j].second;
|
159 |
|
|
if( type == "F"){
|
160 |
|
|
factory->AddVariable(var,'F');
|
161 |
|
|
}else if( type == "I"){
|
162 |
|
|
factory->AddVariable(var,'I');
|
163 |
|
|
}else{
|
164 |
|
|
cout<<"unknow type " << type <<endl;
|
165 |
|
|
return;
|
166 |
|
|
}
|
167 |
|
|
}
|
168 |
|
|
cout<<"factory var added " <<endl;
|
169 |
|
|
|
170 |
|
|
// You can add so-called "Spectator variables", which are not used in the MVA training,
|
171 |
|
|
// but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
|
172 |
|
|
// input variables, the response values of all trained MVAs, and the spectator variables
|
173 |
|
|
///factory->AddSpectator( "spec1:= var1* var1 + var1 * var2 + 3 * var1 + 2 * var2 + var2* var2", "Spectator 1", "units", 'F' );
|
174 |
|
|
//factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' );
|
175 |
|
|
|
176 |
|
|
// Add the variable carrying the regression target
|
177 |
|
|
//factory->AddTarget( "etrue/escraw");
|
178 |
|
|
|
179 |
|
|
|
180 |
|
|
|
181 |
|
|
//barrel
|
182 |
|
|
if(trainecal ==1){
|
183 |
|
|
factory->AddTarget( "etrue/escraw");
|
184 |
|
|
}
|
185 |
|
|
//endcap
|
186 |
|
|
else if(trainecal==2){
|
187 |
|
|
factory->AddTarget( "etrue/(escraw+eps)");
|
188 |
|
|
}
|
189 |
|
|
|
190 |
|
|
// It is also possible to declare additional targets for multi-dimensional regression, ie:
|
191 |
|
|
// -- factory->AddTarget( "fvalue2" );
|
192 |
|
|
// BUT: this is currently ONLY implemented for MLP
|
193 |
|
|
|
194 |
|
|
// Read training and test data (see TMVAClassification for reading ASCII files)
|
195 |
|
|
// load the signal and background event samples from ROOT trees
|
196 |
|
|
|
197 |
|
|
// global event weights per tree (see below for setting event-wise weights)
|
198 |
|
|
Double_t regWeight = 1.0;
|
199 |
|
|
// You can add an arbitrary number of regression trees
|
200 |
|
|
|
201 |
|
|
factory->AddRegressionTree( fChain, regWeight );
|
202 |
|
|
|
203 |
|
|
|
204 |
|
|
|
205 |
|
|
// This would set individual event weights (the variables defined in the
|
206 |
|
|
// expression need to exist in the original TTree)
|
207 |
|
|
|
208 |
|
|
// tell the factory to use all remaining events in the trees after training for testing:
|
209 |
|
|
|
210 |
|
|
int nselected = fChain->GetEntries(mycut);
|
211 |
|
|
cout<<" nselected " << nselected <<endl;
|
212 |
|
|
|
213 |
|
|
int ntrain;
|
214 |
|
|
int ntest;
|
215 |
|
|
|
216 |
|
|
|
217 |
|
|
//barrel
|
218 |
|
|
if(trainecal ==1){
|
219 |
|
|
ntrain = 1500000;
|
220 |
|
|
}
|
221 |
|
|
//endcap
|
222 |
|
|
else if(trainecal==2){
|
223 |
|
|
ntrain = 1000000;
|
224 |
|
|
}
|
225 |
|
|
|
226 |
|
|
if( nselected <ntrain){
|
227 |
|
|
cout<<"ntrain > nselected " << nselected <<" "<< ntrain <<endl;
|
228 |
|
|
return;
|
229 |
|
|
}
|
230 |
|
|
|
231 |
|
|
ntest = nselected-ntrain;
|
232 |
|
|
if( ntest > ntrain){
|
233 |
|
|
ntest = ntrain;
|
234 |
|
|
}
|
235 |
|
|
|
236 |
|
|
TString training = "nTrain_Regression="+TString(Form("%d",ntrain)) + ":nTest_Regression=" +TString(Form("%d",ntest)) + ":SplitMode=Random:NormMode=NumEvents:!V" ;
|
237 |
|
|
|
238 |
|
|
factory->PrepareTrainingAndTestTree( mycut, training);
|
239 |
|
|
|
240 |
|
|
|
241 |
|
|
// If no numbers of events are given, half of the events in the tree are used
|
242 |
|
|
// for training, and the other half for testing:
|
243 |
|
|
// factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
|
244 |
|
|
|
245 |
|
|
// ---- Book MVA methods
|
246 |
|
|
//
|
247 |
|
|
// please lookup the various method configuration options in the corresponding cxx files, eg:
|
248 |
|
|
// src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
|
249 |
|
|
// it is possible to preset ranges in the option string in which the cut optimisation should be done:
|
250 |
|
|
// "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable
|
251 |
|
|
|
252 |
|
|
// PDE - RS method
|
253 |
|
|
if (Use["PDERS"])
|
254 |
|
|
factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
|
255 |
|
|
"!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
|
256 |
|
|
// And the options strings for the MinMax and RMS methods, respectively:
|
257 |
|
|
// "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
|
258 |
|
|
// "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
|
259 |
|
|
|
260 |
|
|
if (Use["PDEFoam"])
|
261 |
|
|
factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
|
262 |
|
|
"!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );
|
263 |
|
|
|
264 |
|
|
// K-Nearest Neighbour classifier (KNN)
|
265 |
|
|
if (Use["KNN"])
|
266 |
|
|
factory->BookMethod( TMVA::Types::kKNN, "KNN",
|
267 |
|
|
"nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
|
268 |
|
|
|
269 |
|
|
// Linear discriminant
|
270 |
|
|
if (Use["LD"])
|
271 |
|
|
factory->BookMethod( TMVA::Types::kLD, "LD",
|
272 |
|
|
"!H:!V:VarTransform=None" );
|
273 |
|
|
|
274 |
|
|
// Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
|
275 |
|
|
if (Use["FDA_MC"])
|
276 |
|
|
factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
|
277 |
|
|
"!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
|
278 |
|
|
|
279 |
|
|
if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
|
280 |
|
|
factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
|
281 |
|
|
"!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );
|
282 |
|
|
|
283 |
|
|
if (Use["FDA_MT"])
|
284 |
|
|
factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
|
285 |
|
|
"!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
|
286 |
|
|
|
287 |
|
|
if (Use["FDA_GAMT"])
|
288 |
|
|
factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
|
289 |
|
|
"!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
|
290 |
|
|
|
291 |
|
|
// Neural network (MLP)
|
292 |
|
|
if (Use["MLP"])
|
293 |
|
|
factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );
|
294 |
|
|
|
295 |
|
|
// Support Vector Machine
|
296 |
|
|
if (Use["SVM"])
|
297 |
|
|
factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
|
298 |
|
|
|
299 |
|
|
// Boosted Decision Trees
|
300 |
|
|
if (Use["BDT"])
|
301 |
|
|
factory->BookMethod( TMVA::Types::kBDT, "BDT",
|
302 |
|
|
"!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );
|
303 |
|
|
|
304 |
|
|
if (Use["BDTG"]){
|
305 |
|
|
factory->BookMethod( TMVA::Types::kBDT, "BDTG","!H:!V:nEventsMin=200:NTrees=100::BoostType=Grad:Shrinkage=0.1:UseYesNoLeaf=F:nCuts=2000:MaxDepth=100:NNodesMax=100000" );
|
306 |
|
|
}
|
307 |
|
|
|
308 |
|
|
// --------------------------------------------------------------------------------------------------
|
309 |
|
|
|
310 |
|
|
// ---- Now you can tell the factory to train, test, and evaluate the MVAs
|
311 |
|
|
|
312 |
|
|
// Train MVAs using the set of training events
|
313 |
|
|
factory->TrainAllMethods();
|
314 |
|
|
|
315 |
|
|
// ---- Evaluate all MVAs using the set of test events
|
316 |
|
|
|
317 |
|
|
// factory->TestAllMethods();
|
318 |
|
|
|
319 |
|
|
// ----- Evaluate and compare performance of all configured MVAs
|
320 |
|
|
|
321 |
|
|
// factory->EvaluateAllMethods();
|
322 |
|
|
|
323 |
|
|
// --------------------------------------------------------------
|
324 |
|
|
|
325 |
|
|
// Save the output
|
326 |
|
|
outputFile->Close();
|
327 |
|
|
|
328 |
|
|
std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
|
329 |
|
|
std::cout << "==> TMVARegression is done!" << std::endl;
|
330 |
|
|
|
331 |
|
|
delete factory;
|
332 |
|
|
|
333 |
|
|
// Launch the GUI for the root macros
|
334 |
|
|
//if (!gROOT->IsBatch()) TMVARegGui( outfileName );
|
335 |
|
|
}
|