ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/LJMet/MultivariateAnalysis/macros/TMVAnalysis.py
Revision: 1.1
Committed: Thu Nov 20 22:34:49 2008 UTC (16 years, 5 months ago) by kukartse
Content type: text/x-python
Branch: MAIN
CVS Tags: V00-03-01, ZMorph_BASE_20100408, gak040610_morphing, V00-02-02, gak011410, gak010310, ejterm2010_25nov2009, V00-02-01, V00-02-00, gak112409, CMSSW_22X_branch_base, segala101609, V00-01-15, V00-01-14, V00-01-13, V00-01-12, V00-01-11, V00-01-10, gak031009, gak030509, gak022309, gak021209, gak040209, gak012809, V00-01-09, V00-01-08, V00-01-07, V00-01-06, V00-01-05, V00-01-04, V00-00-07, V00-00-06, V00-00-05, V00-00-04, V00-01-03, V00-00-02, V00-00-01, HEAD
Branch point for: ZMorph-V00-03-01, CMSSW_22X_branch
Log Message:
created /macros with TMVA scripts in it

File Contents

# User Rev Content
1 kukartse 1.1 #!/usr/bin/env python
2     # @(#)root/tmva $Id: TMVAnalysis.py,v 1.8 2007/06/20 09:41:24 brun Exp $
3     # ------------------------------------------------------------------------------ #
4     # Project : TMVA - a Root-integrated toolkit for multivariate data analysis #
5     # Package : TMVA #
6     # Python script: TMVAnalysis.py #
7     # #
8     # This python script provides examples for the training and testing of all the #
9     # TMVA classifiers through PyROOT. Note that the use PyROOT requires that you #
10     # have a python version > 2.2 installed on your computer. #
11     # #
12     # As input data is used a toy-MC sample consisting of four Gaussian-distributed #
13     # and linearly correlated input variables. #
14     # #
15     # The methods to be used can be switched on and off via the prompt command, for #
16     # example: #
17     # #
18     # python TMVAnalysis.py --methods Fisher,Likelihood #
19     # #
20     # The output file "TMVA.root" can be analysed with the use of dedicated #
21     # macros (simply say: root -l <../macros/macro.C>), which can be conveniently #
22     # invoked through a GUI that will appear at the end of the run of this macro. #
23     # #
24     # for help type "python TMVAnalysis.py --help" #
25     # ------------------------------------------------------------------------------ #
26    
27     # --------------------------------------------
28     # standard python import
29     import sys # exit
30     import time # time accounting
31     import getopt # command line parser
32    
33     # --------------------------------------------
34    
35     # default settings for command line arguments
36     DEFAULT_OUTFNAME = "TMVA.root"
37     DEFAULT_INFNAME = "../examples/data/toy_sigbkg.root"
38     DEFAULT_TREESIG = "TreeS"
39     DEFAULT_TREEBKG = "TreeB"
40     DEFAULT_METHODS = "CutsGA Likelihood LikelihoodPCA PDERS KNN HMatrix Fisher FDA MLP SVM_Gauss BDT RuleFitTMVA"
41    
42     # print help
43     def usage():
44     print " "
45     print "Usage: python %s [options]" % sys.argv[0]
46     print " -m | --methods : gives methods to be run (default: all methods)"
47     print " -i | --inputfile : name of input ROOT file (default: '%s')" % DEFAULT_INFNAME
48     print " -o | --outputfile : name of output ROOT file containing results (default: '%s')" % DEFAULT_OUTFNAME
49     print " -t | --inputtrees : input ROOT Trees for signal and background (default: '%s %s')" \
50     % (DEFAULT_TREESIG, DEFAULT_TREEBKG)
51     print " -v | --verbose"
52     print " -? | --usage : print this help message"
53     print " -h | --help : print this help message"
54     print " "
55    
56     # main routine
57     def main():
58    
59     try:
60     # retrive command line options
61     shortopts = "m:i:t:o:vh?"
62     longopts = ["methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage"]
63     opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )
64    
65     except getopt.GetoptError:
66     # print help information and exit:
67     print "ERROR: unknown options in argument %s" % sys.argv[1:]
68     usage()
69     sys.exit(1)
70    
71     infname = DEFAULT_INFNAME
72     treeNameSig = DEFAULT_TREESIG
73     treeNameBkg = DEFAULT_TREEBKG
74     outfname = DEFAULT_OUTFNAME
75     methods = DEFAULT_METHODS
76     verbose = False
77     for o, a in opts:
78     if o in ("-?", "-h", "--help", "--usage"):
79     usage()
80     sys.exit(0)
81     elif o in ("-m", "--methods"):
82     methods = a
83     elif o in ("-i", "--inputfile"):
84     infname = a
85     elif o in ("-o", "--outputfile"):
86     outfname = a
87     elif o in ("-t", "--inputtrees"):
88     a.strip()
89     trees = a.rsplit( ' ' )
90     trees.sort()
91     trees.reverse()
92     if len(trees)-trees.count('') != 2:
93     print "ERROR: need to give two trees (each one for signal and background)"
94     print trees
95     sys.exit(1)
96     treeNameSig = trees[0]
97     treeNameBkg = trees[1]
98     elif o in ("-v", "--verbose"):
99     verbose = True
100    
101     # print methods
102     mlist = methods.split(',')
103     print "=== TMVAnalysis: use methods..."
104     for m in mlist:
105     if m != '':
106     print "=== <%s>" % m
107    
108     # import ROOT classes
109     from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
110    
111     # logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
112     gROOT.Macro( '../macros/TMVAlogon.C' )
113     gROOT.LoadMacro( '../macros/TMVAGui.C' )
114    
115     # import TMVA classes from ROOT
116     from ROOT import TMVA
117    
118     # output file
119     outputFile = TFile( outfname, 'RECREATE' )
120    
121     # create einstance of factory
122     factory = TMVA.Factory( "TMVAnalysis", outputFile, "Color" )
123    
124     # set verbosity
125     factory.SetVerbose( verbose )
126    
127     # read input data
128     if not gSystem.AccessPathName( infname ):
129     input = TFile( infname )
130     else:
131     print "ERROR: could not access data file %s\n" % infname
132    
133     signal = input.Get( treeNameSig )
134     background = input.Get( treeNameBkg )
135    
136     # global event weights (see below for setting event-wise weights)
137     signalWeight = 1.0
138     backgroundWeight = 1.0
139    
140     if not factory.SetInputTrees( signal, background, signalWeight, backgroundWeight ):
141     print "ERROR: could not set input trees\n"
142     sys.exit(1)
143    
144     # Define the input variables that shall be used for the classifier training
145     # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
146     # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
147     factory.AddVariable("var1+var2", 'F')
148     factory.AddVariable("var1-var2", 'F')
149     factory.AddVariable("var3", 'F')
150     factory.AddVariable("var4", 'F')
151    
152     # This would set individual event weights (the variables defined in the
153     # expression need to exist in the original TTree)
154     # factory->SetWeightExpression("weight1*weight2")
155     #
156     # Apply additional cuts on the signal and background sample.
157     # Assumptions on size of training and testing sample:
158     # a) equal number of signal and background events is used for training
159     # b) any numbers of signal and background events are used for testing
160     # c) an explicit syntax can violate a)
161     # more Documentation with the Factory class
162     # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
163     mycut = TCut( "" )
164    
165     # here, the relevant variables are copied over in new, slim trees that are
166     # used for TMVA training and testing
167     # "SplitMode=Random" means that the input events are randomly shuffled before
168     # splitting them into training and test samples
169     factory.PrepareTrainingAndTestTree( mycut, "NSigTrain=3000:NBkgTrain=3000:SplitMode=Random:NormMode=NumEvents:!V" )
170    
171     # and alternative call to use a different number of signal and background training/test event is:
172     # factory.PrepareTrainingAndTestTree( mycut, "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" )
173    
174     # Cut optimisation
175     if "Cuts" in mlist:
176     factory.BookMethod( TMVA.Types.kCuts, "Cuts",
177     "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" )
178    
179     # Cut optimisation using decorrelated input variables
180     if "CutsD" in mlist:
181     factory.BookMethod( TMVA.Types.kCuts, "CutsD",
182     "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )
183    
184     # Cut optimisation with a Genetic Algorithm
185     if "CutsGA" in mlist:
186     factory.BookMethod( TMVA.Types.kCuts, "CutsGA",
187     "!H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=100:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp=FSmart" )
188    
189     # Likelihood
190     if "Likelihood" in mlist:
191     factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood",
192     "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=100:NSmoothBkg[0]=10:NSmoothBkg[1]=100:NSmooth=10:NAvEvtPerBin=50" )
193    
194     # test the decorrelated likelihood
195     if "LikelihoodD" in mlist:
196     factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD",
197     "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=100:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )
198    
199     if "LikelihoodPCA" in mlist:
200     factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA",
201     "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=100:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" )
202    
203     # likelihood method with unbinned kernel estimator
204     if "LikelihoodKDE" in mlist:
205     factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE",
206     "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" )
207    
208     # PDE - RS method
209     if "PDERS" in mlist:
210     factory.BookMethod( TMVA.Types.kPDERS, "PDERS",
211     "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:InitialScale=0.99" )
212    
213     if "PDERSD" in mlist:
214     factory.BookMethod( TMVA.Types.kPDERS, "PDERSD",
215     "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:InitialScale=0.99:VarTransform=Decorrelate" )
216    
217     if "PDERSPCA" in mlist:
218     factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA",
219     "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:InitialScale=0.99:VarTransform=PCA" )
220    
221     # HMatrix (chi2-squared) method
222     if "HMatrix" in mlist:
223     factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )
224    
225     # Fisher - also creates PDF for MVA output (here as an example, can be used for any other classifier)
226     if "Fisher" in mlist:
227     factory.BookMethod( TMVA.Types.kFisher, "Fisher",
228     "H:!V:!Normalise:CreateMVAPdfs:Fisher:NbinsMVAPdf=50:NsmoothMVAPdf=1" )
229    
230     # Function discriminant analysis
231     if "FDA" in mlist:
232     factory.BookMethod( TMVA.Types.kFDA,"FDA_MT",
233     "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" )
234    
235     # the new TMVA ANN: MLP (recommended ANN)
236     if "MLP" in mlist:
237     factory.BookMethod( TMVA.Types.kMLP, "MLP", "Normalise:H:!V:NCycles=200:HiddenLayers=N+1,N:TestRate=5" )
238    
239     # CF(Clermont-Ferrand)ANN
240     if "CFMlpANN" in mlist:
241     factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=500:HiddenLayers=N+1,N" )
242    
243     # Tmlp(Root)ANN
244     if "TMlpANN" in mlist:
245     factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N" )
246    
247     # Support Vector Machine with varying kernel functions
248     if "SVM_Gauss" in mlist:
249     factory.BookMethod( TMVA.Types.kSVM, "SVM_Gauss", "Sigma=2:C=1:Tol=0.001:Kernel=Gauss" )
250    
251     if "SVM_Poly" in mlist:
252     factory.BookMethod( TMVA.Types.kSVM, "SVM_Poly", "Order=4:Theta=1:C=0.1:Tol=0.001:Kernel=Polynomial" )
253    
254     if "SVM_Lin" in mlist:
255     factory.BookMethod( TMVA.Types.kSVM, "SVM_Lin", "!H:!V:Kernel=Linear:C=1:Tol=0.001" )
256    
257     # Boosted Decision Trees
258     if "BDT" in mlist:
259     factory.BookMethod( TMVA.Types.kBDT, "BDT",
260     "!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=4.5")
261    
262     # Decorrelated Boosted Decision Trees
263     if "BDTD" in mlist:
264     factory.BookMethod( TMVA.Types.kBDT, "BDTD",
265     "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=4.5" )
266    
267     # Friedman's RuleFit method
268     if "RuleFitTMVA" in mlist:
269     factory.BookMethod( TMVA.Types.kRuleFit, "RuleFitTMVA",
270     "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )
271    
272     if "RuleFitJF" in mlist:
273     factory.BookMethod( TMVA.Types.kRuleFit, "RuleFitJF",
274     "!V:RuleFitModule=RFFriedman:Model=ModRuleLinear:GDStep=0.01:GDNSteps=10000:GDErrScale=1.1:RFNendnodes=4" )
275    
276     # ---- Now you can tell the factory to train, test, and evaluate the MVAs.
277    
278     # Train MVAs
279     factory.TrainAllMethods()
280    
281     # Test MVAs
282     factory.TestAllMethods()
283    
284     # Evaluate MVAs
285     factory.EvaluateAllMethods()
286    
287     # Save the output.
288     outputFile.Close()
289    
290     # clean up
291     factory.IsA().Destructor( factory )
292    
293     print "=== wrote root file %s\n" % outfname
294     print "=== TMVAnalysis is done!\n"
295    
296     # open the GUI for the result macros
297     gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname );
298    
299     # keep the ROOT thread running
300     gApplication.Run()
301    
302     # ----------------------------------------------------------
303    
304     if __name__ == "__main__":
305     main()