ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/LJMet/MultivariateAnalysis/macros/TMVAnalysis.py
Revision: 1.1
Committed: Thu Nov 20 22:34:49 2008 UTC (16 years, 5 months ago) by kukartse
Content type: text/x-python
Branch: MAIN
CVS Tags: V00-03-01, ZMorph_BASE_20100408, gak040610_morphing, V00-02-02, gak011410, gak010310, ejterm2010_25nov2009, V00-02-01, V00-02-00, gak112409, CMSSW_22X_branch_base, segala101609, V00-01-15, V00-01-14, V00-01-13, V00-01-12, V00-01-11, V00-01-10, gak031009, gak030509, gak022309, gak021209, gak040209, gak012809, V00-01-09, V00-01-08, V00-01-07, V00-01-06, V00-01-05, V00-01-04, V00-00-07, V00-00-06, V00-00-05, V00-00-04, V00-01-03, V00-00-02, V00-00-01, HEAD
Branch point for: ZMorph-V00-03-01, CMSSW_22X_branch
Log Message:
created /macros with TMVA scripts in it

File Contents

# Content
1 #!/usr/bin/env python
2 # @(#)root/tmva $Id: TMVAnalysis.py,v 1.8 2007/06/20 09:41:24 brun Exp $
3 # ------------------------------------------------------------------------------ #
4 # Project : TMVA - a Root-integrated toolkit for multivariate data analysis #
5 # Package : TMVA #
6 # Python script: TMVAnalysis.py #
7 # #
8 # This python script provides examples for the training and testing of all the #
9 # TMVA classifiers through PyROOT. Note that the use PyROOT requires that you #
10 # have a python version > 2.2 installed on your computer. #
11 # #
12 # As input data is used a toy-MC sample consisting of four Gaussian-distributed #
13 # and linearly correlated input variables. #
14 # #
15 # The methods to be used can be switched on and off via the prompt command, for #
16 # example: #
17 # #
18 # python TMVAnalysis.py --methods Fisher,Likelihood #
19 # #
20 # The output file "TMVA.root" can be analysed with the use of dedicated #
21 # macros (simply say: root -l <../macros/macro.C>), which can be conveniently #
22 # invoked through a GUI that will appear at the end of the run of this macro. #
23 # #
24 # for help type "python TMVAnalysis.py --help" #
25 # ------------------------------------------------------------------------------ #
26
27 # --------------------------------------------
28 # standard python import
29 import sys # exit
30 import time # time accounting
31 import getopt # command line parser
32
33 # --------------------------------------------
34
35 # default settings for command line arguments
36 DEFAULT_OUTFNAME = "TMVA.root"
37 DEFAULT_INFNAME = "../examples/data/toy_sigbkg.root"
38 DEFAULT_TREESIG = "TreeS"
39 DEFAULT_TREEBKG = "TreeB"
40 DEFAULT_METHODS = "CutsGA Likelihood LikelihoodPCA PDERS KNN HMatrix Fisher FDA MLP SVM_Gauss BDT RuleFitTMVA"
41
42 # print help
43 def usage():
44 print " "
45 print "Usage: python %s [options]" % sys.argv[0]
46 print " -m | --methods : gives methods to be run (default: all methods)"
47 print " -i | --inputfile : name of input ROOT file (default: '%s')" % DEFAULT_INFNAME
48 print " -o | --outputfile : name of output ROOT file containing results (default: '%s')" % DEFAULT_OUTFNAME
49 print " -t | --inputtrees : input ROOT Trees for signal and background (default: '%s %s')" \
50 % (DEFAULT_TREESIG, DEFAULT_TREEBKG)
51 print " -v | --verbose"
52 print " -? | --usage : print this help message"
53 print " -h | --help : print this help message"
54 print " "
55
56 # main routine
57 def main():
58
59 try:
60 # retrive command line options
61 shortopts = "m:i:t:o:vh?"
62 longopts = ["methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage"]
63 opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )
64
65 except getopt.GetoptError:
66 # print help information and exit:
67 print "ERROR: unknown options in argument %s" % sys.argv[1:]
68 usage()
69 sys.exit(1)
70
71 infname = DEFAULT_INFNAME
72 treeNameSig = DEFAULT_TREESIG
73 treeNameBkg = DEFAULT_TREEBKG
74 outfname = DEFAULT_OUTFNAME
75 methods = DEFAULT_METHODS
76 verbose = False
77 for o, a in opts:
78 if o in ("-?", "-h", "--help", "--usage"):
79 usage()
80 sys.exit(0)
81 elif o in ("-m", "--methods"):
82 methods = a
83 elif o in ("-i", "--inputfile"):
84 infname = a
85 elif o in ("-o", "--outputfile"):
86 outfname = a
87 elif o in ("-t", "--inputtrees"):
88 a.strip()
89 trees = a.rsplit( ' ' )
90 trees.sort()
91 trees.reverse()
92 if len(trees)-trees.count('') != 2:
93 print "ERROR: need to give two trees (each one for signal and background)"
94 print trees
95 sys.exit(1)
96 treeNameSig = trees[0]
97 treeNameBkg = trees[1]
98 elif o in ("-v", "--verbose"):
99 verbose = True
100
101 # print methods
102 mlist = methods.split(',')
103 print "=== TMVAnalysis: use methods..."
104 for m in mlist:
105 if m != '':
106 print "=== <%s>" % m
107
108 # import ROOT classes
109 from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
110
111 # logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
112 gROOT.Macro( '../macros/TMVAlogon.C' )
113 gROOT.LoadMacro( '../macros/TMVAGui.C' )
114
115 # import TMVA classes from ROOT
116 from ROOT import TMVA
117
118 # output file
119 outputFile = TFile( outfname, 'RECREATE' )
120
121 # create einstance of factory
122 factory = TMVA.Factory( "TMVAnalysis", outputFile, "Color" )
123
124 # set verbosity
125 factory.SetVerbose( verbose )
126
127 # read input data
128 if not gSystem.AccessPathName( infname ):
129 input = TFile( infname )
130 else:
131 print "ERROR: could not access data file %s\n" % infname
132
133 signal = input.Get( treeNameSig )
134 background = input.Get( treeNameBkg )
135
136 # global event weights (see below for setting event-wise weights)
137 signalWeight = 1.0
138 backgroundWeight = 1.0
139
140 if not factory.SetInputTrees( signal, background, signalWeight, backgroundWeight ):
141 print "ERROR: could not set input trees\n"
142 sys.exit(1)
143
144 # Define the input variables that shall be used for the classifier training
145 # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
146 # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
147 factory.AddVariable("var1+var2", 'F')
148 factory.AddVariable("var1-var2", 'F')
149 factory.AddVariable("var3", 'F')
150 factory.AddVariable("var4", 'F')
151
152 # This would set individual event weights (the variables defined in the
153 # expression need to exist in the original TTree)
154 # factory->SetWeightExpression("weight1*weight2")
155 #
156 # Apply additional cuts on the signal and background sample.
157 # Assumptions on size of training and testing sample:
158 # a) equal number of signal and background events is used for training
159 # b) any numbers of signal and background events are used for testing
160 # c) an explicit syntax can violate a)
161 # more Documentation with the Factory class
162 # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
163 mycut = TCut( "" )
164
165 # here, the relevant variables are copied over in new, slim trees that are
166 # used for TMVA training and testing
167 # "SplitMode=Random" means that the input events are randomly shuffled before
168 # splitting them into training and test samples
169 factory.PrepareTrainingAndTestTree( mycut, "NSigTrain=3000:NBkgTrain=3000:SplitMode=Random:NormMode=NumEvents:!V" )
170
171 # and alternative call to use a different number of signal and background training/test event is:
172 # factory.PrepareTrainingAndTestTree( mycut, "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" )
173
174 # Cut optimisation
175 if "Cuts" in mlist:
176 factory.BookMethod( TMVA.Types.kCuts, "Cuts",
177 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" )
178
179 # Cut optimisation using decorrelated input variables
180 if "CutsD" in mlist:
181 factory.BookMethod( TMVA.Types.kCuts, "CutsD",
182 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )
183
184 # Cut optimisation with a Genetic Algorithm
185 if "CutsGA" in mlist:
186 factory.BookMethod( TMVA.Types.kCuts, "CutsGA",
187 "!H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=100:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp=FSmart" )
188
189 # Likelihood
190 if "Likelihood" in mlist:
191 factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood",
192 "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=100:NSmoothBkg[0]=10:NSmoothBkg[1]=100:NSmooth=10:NAvEvtPerBin=50" )
193
194 # test the decorrelated likelihood
195 if "LikelihoodD" in mlist:
196 factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD",
197 "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=100:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )
198
199 if "LikelihoodPCA" in mlist:
200 factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA",
201 "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=100:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" )
202
203 # likelihood method with unbinned kernel estimator
204 if "LikelihoodKDE" in mlist:
205 factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE",
206 "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" )
207
208 # PDE - RS method
209 if "PDERS" in mlist:
210 factory.BookMethod( TMVA.Types.kPDERS, "PDERS",
211 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:InitialScale=0.99" )
212
213 if "PDERSD" in mlist:
214 factory.BookMethod( TMVA.Types.kPDERS, "PDERSD",
215 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:InitialScale=0.99:VarTransform=Decorrelate" )
216
217 if "PDERSPCA" in mlist:
218 factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA",
219 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:InitialScale=0.99:VarTransform=PCA" )
220
221 # HMatrix (chi2-squared) method
222 if "HMatrix" in mlist:
223 factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )
224
225 # Fisher - also creates PDF for MVA output (here as an example, can be used for any other classifier)
226 if "Fisher" in mlist:
227 factory.BookMethod( TMVA.Types.kFisher, "Fisher",
228 "H:!V:!Normalise:CreateMVAPdfs:Fisher:NbinsMVAPdf=50:NsmoothMVAPdf=1" )
229
230 # Function discriminant analysis
231 if "FDA" in mlist:
232 factory.BookMethod( TMVA.Types.kFDA,"FDA_MT",
233 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" )
234
235 # the new TMVA ANN: MLP (recommended ANN)
236 if "MLP" in mlist:
237 factory.BookMethod( TMVA.Types.kMLP, "MLP", "Normalise:H:!V:NCycles=200:HiddenLayers=N+1,N:TestRate=5" )
238
239 # CF(Clermont-Ferrand)ANN
240 if "CFMlpANN" in mlist:
241 factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=500:HiddenLayers=N+1,N" )
242
243 # Tmlp(Root)ANN
244 if "TMlpANN" in mlist:
245 factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N" )
246
247 # Support Vector Machine with varying kernel functions
248 if "SVM_Gauss" in mlist:
249 factory.BookMethod( TMVA.Types.kSVM, "SVM_Gauss", "Sigma=2:C=1:Tol=0.001:Kernel=Gauss" )
250
251 if "SVM_Poly" in mlist:
252 factory.BookMethod( TMVA.Types.kSVM, "SVM_Poly", "Order=4:Theta=1:C=0.1:Tol=0.001:Kernel=Polynomial" )
253
254 if "SVM_Lin" in mlist:
255 factory.BookMethod( TMVA.Types.kSVM, "SVM_Lin", "!H:!V:Kernel=Linear:C=1:Tol=0.001" )
256
257 # Boosted Decision Trees
258 if "BDT" in mlist:
259 factory.BookMethod( TMVA.Types.kBDT, "BDT",
260 "!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=4.5")
261
262 # Decorrelated Boosted Decision Trees
263 if "BDTD" in mlist:
264 factory.BookMethod( TMVA.Types.kBDT, "BDTD",
265 "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=4.5" )
266
267 # Friedman's RuleFit method
268 if "RuleFitTMVA" in mlist:
269 factory.BookMethod( TMVA.Types.kRuleFit, "RuleFitTMVA",
270 "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )
271
272 if "RuleFitJF" in mlist:
273 factory.BookMethod( TMVA.Types.kRuleFit, "RuleFitJF",
274 "!V:RuleFitModule=RFFriedman:Model=ModRuleLinear:GDStep=0.01:GDNSteps=10000:GDErrScale=1.1:RFNendnodes=4" )
275
276 # ---- Now you can tell the factory to train, test, and evaluate the MVAs.
277
278 # Train MVAs
279 factory.TrainAllMethods()
280
281 # Test MVAs
282 factory.TestAllMethods()
283
284 # Evaluate MVAs
285 factory.EvaluateAllMethods()
286
287 # Save the output.
288 outputFile.Close()
289
290 # clean up
291 factory.IsA().Destructor( factory )
292
293 print "=== wrote root file %s\n" % outfname
294 print "=== TMVAnalysis is done!\n"
295
296 # open the GUI for the result macros
297 gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname );
298
299 # keep the ROOT thread running
300 gApplication.Run()
301
302 # ----------------------------------------------------------
303
304 if __name__ == "__main__":
305 main()