ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/yangyong/Regression/trainReg.C
Revision: 1.1
Committed: Mon May 14 11:42:54 2012 UTC (12 years, 11 months ago) by yangyong
Content type: text/plain
Branch: MAIN
CVS Tags: V00-00-01, HEAD
Log Message:
train regression in tmva

File Contents

# User Rev Content
1 yangyong 1.1 #include <cstdlib>
2     #include <iostream>
3     #include <map>
4     #include <string>
5    
6     #include "TChain.h"
7     #include "TFile.h"
8     #include "TTree.h"
9     #include "TString.h"
10     #include "TObjString.h"
11     #include "TSystem.h"
12     #include "TROOT.h"
13    
14     #include "TMVARegGui.C"
15    
16     #if not defined(__CINT__) || defined(__MAKECINT__)
17     #include "TMVA/Tools.h"
18     #include "TMVA/Factory.h"
19     #endif
20    
21     using namespace TMVA;
22    
23     int trainecal;
24     int trainCut;
25     TCut mycut;
26     TChain *fChain;
27    
28    
29    
30     vector< pair<TString,TString> > vec_trainVar;
31    
32     #include "traincut.cc"
33     #include "trainvar.cc"
34    
35     void trainReg(int test_trainecal, TString myMethodList = "BDTG" )
36     {
37    
38    
39     trainecal = test_trainecal;
40    
41     // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
42     // if you use your private .rootrc, or run from a different directory, please copy the
43     // corresponding lines from .rootrc
44    
45     // methods to be processed can be given as an argument; use format:
46     //
47     // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\)
48     //
49    
50     //---------------------------------------------------------------
51     // This loads the library
52     TMVA::Tools::Instance();
53    
54     // Default MVA methods to be trained + tested
55     std::map<std::string,int> Use;
56    
57     // --- Mutidimensional likelihood and Nearest-Neighbour methods
58     Use["PDERS"] = 0;
59    
60     //Use["PDEFoam"] = 1;
61     Use["PDEFoam"] = 0;
62    
63     Use["KNN"] = 0;
64     //
65    
66     // --- Linear Discriminant Analysis
67     Use["LD"] = 0;
68    
69     //
70     // --- Function Discriminant analysis
71     Use["FDA_GA"] = 1;
72    
73     Use["FDA_MC"] = 0;
74     Use["FDA_MT"] = 0;
75     Use["FDA_GAMT"] = 0;
76     //
77     // --- Neural Network
78     Use["MLP"] = 1;
79     //
80     // --- Support Vector Machine
81     Use["SVM"] = 0;
82     //
83     // --- Boosted Decision Trees
84     Use["BDT"] = 0;
85     Use["BDTG"] = 1;
86     // ---------------------------------------------------------------
87    
88     std::cout << std::endl;
89     std::cout << "==> Start TMVARegression" << std::endl;
90    
91     // Select methods (don't look at this code - not of interest)
92     if (myMethodList != "") {
93     for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
94    
95     std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
96     for (UInt_t i=0; i<mlist.size(); i++) {
97     std::string regMethod(mlist[i]);
98    
99     if (Use.find(regMethod) == Use.end()) {
100     std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
101     for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
102     std::cout << std::endl;
103     return;
104     }
105     Use[regMethod] = 1;
106     }
107     }
108    
109     // --------------------------------------------------------------------------------------------------
110    
111     // --- Here the preparation phase begins
112    
113     // Create a new root output file
114    
115     TString outfileName( "TMVARegOutput.root" );
116    
117     TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
118    
119     // Create the factory object. Later you can choose the methods
120     // whose performance you'd like to investigate. The factory will
121     // then run the performance analysis for you.
122     //
123     // The first argument is the base of the name of all the
124     // weightfiles in the directory weight/
125     //
126     // The second argument is the output file for the training results
127     // All TMVA output can be suppressed by removing the "!" (not) in
128     // front of the "Silent" argument in the option string
129     TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile,
130     "!V:!Silent:Color:!DrawProgressBar" );
131     // If you wish to modify default settings
132     // (please check "src/Config.h" to see all available global options)
133     // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
134     // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
135    
136     // Define the input variables that shall be used for the MVA training
137     // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
138     // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
139    
140    
141     //factory->AddVariable( "var1", "Variable 1", "units", 'F' );
142     //factory->AddVariable( "var2", "Variable 2", "units", 'F' );
143    
144    
145     fChain = new TChain("Analysis");
146     fChain->Add("/mnt/hadoop/user/yangyong//data/photonTree/gjs6.root");
147    
148    
149     // Apply additional cuts on the signal and background samples (can be different)
150     ///TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";
151    
152     setTrainingCut();
153     setTrainingVar();
154    
155    
156     for(int j=0; j<int(vec_trainVar.size());j++){
157     TString var = vec_trainVar[j].first;
158     TString type = vec_trainVar[j].second;
159     if( type == "F"){
160     factory->AddVariable(var,'F');
161     }else if( type == "I"){
162     factory->AddVariable(var,'I');
163     }else{
164     cout<<"unknow type " << type <<endl;
165     return;
166     }
167     }
168     cout<<"factory var added " <<endl;
169    
170     // You can add so-called "Spectator variables", which are not used in the MVA training,
171     // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
172     // input variables, the response values of all trained MVAs, and the spectator variables
173     ///factory->AddSpectator( "spec1:= var1* var1 + var1 * var2 + 3 * var1 + 2 * var2 + var2* var2", "Spectator 1", "units", 'F' );
174     //factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' );
175    
176     // Add the variable carrying the regression target
177     //factory->AddTarget( "etrue/escraw");
178    
179    
180    
181     //barrel
182     if(trainecal ==1){
183     factory->AddTarget( "etrue/escraw");
184     }
185     //endcap
186     else if(trainecal==2){
187     factory->AddTarget( "etrue/(escraw+eps)");
188     }
189    
190     // It is also possible to declare additional targets for multi-dimensional regression, ie:
191     // -- factory->AddTarget( "fvalue2" );
192     // BUT: this is currently ONLY implemented for MLP
193    
194     // Read training and test data (see TMVAClassification for reading ASCII files)
195     // load the signal and background event samples from ROOT trees
196    
197     // global event weights per tree (see below for setting event-wise weights)
198     Double_t regWeight = 1.0;
199     // You can add an arbitrary number of regression trees
200    
201     factory->AddRegressionTree( fChain, regWeight );
202    
203    
204    
205     // This would set individual event weights (the variables defined in the
206     // expression need to exist in the original TTree)
207    
208     // tell the factory to use all remaining events in the trees after training for testing:
209    
210     int nselected = fChain->GetEntries(mycut);
211     cout<<" nselected " << nselected <<endl;
212    
213     int ntrain;
214     int ntest;
215    
216    
217     //barrel
218     if(trainecal ==1){
219     ntrain = 1500000;
220     }
221     //endcap
222     else if(trainecal==2){
223     ntrain = 1000000;
224     }
225    
226     if( nselected <ntrain){
227     cout<<"ntrain > nselected " << nselected <<" "<< ntrain <<endl;
228     return;
229     }
230    
231     ntest = nselected-ntrain;
232     if( ntest > ntrain){
233     ntest = ntrain;
234     }
235    
236     TString training = "nTrain_Regression="+TString(Form("%d",ntrain)) + ":nTest_Regression=" +TString(Form("%d",ntest)) + ":SplitMode=Random:NormMode=NumEvents:!V" ;
237    
238     factory->PrepareTrainingAndTestTree( mycut, training);
239    
240    
241     // If no numbers of events are given, half of the events in the tree are used
242     // for training, and the other half for testing:
243     // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
244    
245     // ---- Book MVA methods
246     //
247     // please lookup the various method configuration options in the corresponding cxx files, eg:
248     // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
249     // it is possible to preset ranges in the option string in which the cut optimisation should be done:
250     // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable
251    
252     // PDE - RS method
253     if (Use["PDERS"])
254     factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
255     "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
256     // And the options strings for the MinMax and RMS methods, respectively:
257     // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
258     // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
259    
260     if (Use["PDEFoam"])
261     factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
262     "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );
263    
264     // K-Nearest Neighbour classifier (KNN)
265     if (Use["KNN"])
266     factory->BookMethod( TMVA::Types::kKNN, "KNN",
267     "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
268    
269     // Linear discriminant
270     if (Use["LD"])
271     factory->BookMethod( TMVA::Types::kLD, "LD",
272     "!H:!V:VarTransform=None" );
273    
274     // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
275     if (Use["FDA_MC"])
276     factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
277     "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
278    
279     if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
280     factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
281     "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );
282    
283     if (Use["FDA_MT"])
284     factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
285     "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
286    
287     if (Use["FDA_GAMT"])
288     factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
289     "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
290    
291     // Neural network (MLP)
292     if (Use["MLP"])
293     factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );
294    
295     // Support Vector Machine
296     if (Use["SVM"])
297     factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
298    
299     // Boosted Decision Trees
300     if (Use["BDT"])
301     factory->BookMethod( TMVA::Types::kBDT, "BDT",
302     "!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );
303    
304     if (Use["BDTG"]){
305     factory->BookMethod( TMVA::Types::kBDT, "BDTG","!H:!V:nEventsMin=200:NTrees=100::BoostType=Grad:Shrinkage=0.1:UseYesNoLeaf=F:nCuts=2000:MaxDepth=100:NNodesMax=100000" );
306     }
307    
308     // --------------------------------------------------------------------------------------------------
309    
310     // ---- Now you can tell the factory to train, test, and evaluate the MVAs
311    
312     // Train MVAs using the set of training events
313     factory->TrainAllMethods();
314    
315     // ---- Evaluate all MVAs using the set of test events
316    
317     // factory->TestAllMethods();
318    
319     // ----- Evaluate and compare performance of all configured MVAs
320    
321     // factory->EvaluateAllMethods();
322    
323     // --------------------------------------------------------------
324    
325     // Save the output
326     outputFile->Close();
327    
328     std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
329     std::cout << "==> TMVARegression is done!" << std::endl;
330    
331     delete factory;
332    
333     // Launch the GUI for the root macros
334     //if (!gROOT->IsBatch()) TMVARegGui( outfileName );
335     }