ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/RootMacros/tree2hists.py
(Generate patch)

Comparing UserCode/RootMacros/tree2hists.py (file contents):
Revision 1.3 by anderson, Thu May 20 19:44:30 2010 UTC vs.
Revision 1.5 by anderson, Wed Jun 2 16:06:43 2010 UTC

# Line 1 | Line 1
1   #!/usr/bin/env python
2 + """
3 + Create ROOT Histograms from one or more ROOT TTrees or TNtuples.
4 +
5 + Options are specified in the given configuration file.
6 + """
7 + ## Created by Michael Anderson (mbanderson@wisc.edu), May 2010
8   #
9 < # Run by typing:
10 < #   ./tree2hists.py
11 < #
12 < # Creates histograms from TTree in one or more files.
13 < # Different cuts & scales can be set for each file.
14 < #
9 < # Details are specified in a configruation file, and run with:
10 < #   ./tree2hists.py config.py
11 < #
12 < # Michael Anderson
13 < # Original: Nov 4, 2009
14 < # Updated:  May 20, 2010
9 > # Create configuration file:
10 > #   tree2hists.py
11 > # Edit, then run with config file:
12 > #   tree2hists.py config.py
13 >
14 > ######## Import python libraries #############################################
15  
16   import sys                              # For exiting program
17 < if sys.version_info < (2, 6):
18 <    print "Go to a CMSSW dir and type 'cmsenv' first. (this loads a modern version of python)"
17 > try:
18 >    from ROOT import TFile, TTree, TH1F, TH2F, TH3F, gROOT
19 > except Exception, e:
20 >    print e
21 >    print ("Use a python that has PyROOT installed.")
22      sys.exit(0)
23 < from ROOT import TFile, TH1F, TH2F, TH3F, TTree, gROOT # Import any ROOT class you want
24 < from copy import deepcopy               # For copying histograms
25 < from math import pi
26 < from array import array                 # For making Float_t array ROOT wants (for hists)
27 < from datetime import datetime           # For output filename
28 < from os import path                     # For finding file
29 <
30 < ########################################
31 < # Class for root files to store
32 < # file's name, scale, cuts, etc...
33 < class rootFile:
34 <    def __init__(self, fileName, scale=1.0, cuts="", tree=""):
35 <        self.name  = fileName
36 <        self.scale = scale
37 <        self.cuts  = cuts
38 <        self.file  = TFile(fileName, "read")     # Open TFile
39 <        if self.file.IsZombie():
40 <            print "Error opening %s, exiting..." % self.name
38 <            sys.exit(0)
39 <        print "Opened %s, scale=%.2e, cuts='%s'" % (fileName, scale, cuts)
40 <        self.ttree = TTree()                           # Create empty TTree, and
41 <        try:                                           # try to get TTree from file.
42 <            self.file.GetObject(tree, self.ttree)      # ttreeName set in variables below
43 <        except:
44 <            print "Error: %s not found in %s, exiting..." % (tree, fileName)
45 <            sys.exit(0)
46 <        print "\t %s contains %.0f entries before cuts, %.0f after." % (tree, self.ttree.GetEntries(), self.ttree.GetEntries(cuts))
23 > from copy import deepcopy     # For copying histograms
24 > from math import pi           # For use in histogram bounds
25 > from array import array       # For making Float_t array ROOT wants (for hists)
26 > from datetime import datetime # For output filename
27 > from os import path           # For finding file
28 >
29 > ######## Define classes and generators #######################################
30 >
31 > class RootTree:
32 >    """Wrapper for TTrees and TNtuples, allowing association with
33 >    a scale and cuts."""
34 >    def __init__(self, treeName, fileName, scale=1.0, cuts=""):
35 >        self.fileName = fileName
36 >        self.treeName = treeName
37 >        self.scale    = scale
38 >        self.cuts     = cuts
39 >        self.tfile    = TFile()
40 >        self.ttree    = TTree()
41  
48 # Class to store varibles, histograms to plot them into, and cuts
42   class Plot:
43 <    def __init__(self, treeVariable, histogram, cuts=""):
43 >    """Wrapper for TH1 objects, associating TTree variables with a histogram"""
44 >    def __init__(self, treeVariable, histogram, cuts="", storeErrors=True):
45          self.treeVariable = treeVariable
46 <        self.histogram = histogram
47 <        self.cuts = cuts
48 <        self.histogram.Sumw2()  # Store errors
49 <
50 < # This joins a list of cuts (strings)
51 < # into something ROOT can handle
52 < #  example: given several strings, like "1<2","","5>4"
53 < #  this returns the sting "1<2&&5>4"
54 < def joinNonEmpty(*listOfCuts):
55 <    listOfNonEmptyCuts=[]
62 <    for cut in listOfCuts:
46 >        self.histogram    = histogram
47 >        self.name         = histogram.GetName()
48 >        self.cuts         = cuts
49 >        if storeErrors: self.histogram.Sumw2()
50 >
51 > def join_cuts(*list_of_cuts):
52 >    """Joins list of cuts (strings) into something ROOT can handle.
53 >    Example:  given ('1<2','','5>4') returns '1<2&&5>4'"""
54 >    list_of_nonempty_cuts = []
55 >    for cut in list_of_cuts:
56          if cut:
57 <            listOfNonEmptyCuts.append(cut)
58 <    return '&&'.join(listOfNonEmptyCuts)
57 >            list_of_nonempty_cuts.append(cut)
58 >    return '&&'.join(list_of_nonempty_cuts)
59  
60 < def writeDefaultConfig():
61 <    if path.exists('t2h_config.py'):
62 <        print "Specify a config file, like:"
63 <        print "./tree2hists.py t2h_config.py"
64 <        sys.exit(0)
60 > def duration_to_string(start, end):
61 >    timeTaken = end - start
62 >    hours, remainder = divmod(timeTaken.seconds, 3600)
63 >    minutes, seconds = divmod(remainder, 60)
64 >    if hours>0:
65 >        return "%i hours, %i minutes" % (hours, minutes)
66 >    elif minutes>0:
67 >        return "%i minutes" % minutes
68 >    return "%i seconds" % seconds                            
69 >
70 > def write_default_T2H_config():
71 >    """Writes configuration file for tree2hists"""
72      defaultConfig = '''# Configuration file for tree2hists.py
73   # Created %s.
74 < from tree2hists import *
74 > from tree2hists import RootTree, Plot, array, pi, TH1F
75  
76 < listOfFiles = [rootFile("MultiPhotonAnalyzer_SDEG.root", scale=1.0, tree="NTuples/Analysis"),
77 <               rootFile("MultiPhotonAnalyzer_SD_EG_May7.root", scale=1.0, tree="NTuples/Analysis")]
76 > list_of_files = [RootTree("NTuples/Analysis", fileName="photons.root", scale=1.0, cuts=""),
77 >                 RootTree("NTuples/Analysis", fileName="photons2.root", scale=1.0, cuts="")]
78  
79 < outputFilename = "Hists_Data_%%s.root" %% datetime.now().strftime("%%b%%d_%%I%%p")
79 > output_filename = "Hists_photons.root"
80  
81 < cutForAllFiles = "(!TTBit[36] && !TTBit[37] && !TTBit[38] && !TTBit[39] && !vtxIsFake && vtxNdof>4 && abs(vtxZ)<=15)" + \
82 <                 "&&((isEB[0] && (seedSeverity[0]!=3 && seedSeverity[0]!=4 ) && (seedRecoFlag[0] != 2) ) || isEE[0])"
81 > cut_for_all_files = "(!TTBit[36] && !TTBit[37] && !TTBit[38] && !TTBit[39] && !vtxIsFake && vtxNdof>4 && abs(vtxZ)<=15)" + \\
82 >                    "&&((isEB[0] && (seedSeverity[0]!=3 && seedSeverity[0]!=4 ) && (seedRecoFlag[0] != 2) ) || isEE[0])"
83  
84 < # All plots are made for each "cutSet".
85 < # A cut set is 3 things: folder name to store hists in, string to add to hist titles, and actual cuts
86 < cutSets = (
84 > # All plots are made for each "cut set".
85 > # A "cut set" is 3 things: folder name to store hists in, string to add to hist titles, and cuts for these hists.
86 > # Let cut_sets = [] to make all plots.
87 > cut_sets = [
88      ("barrel15to20", "(|#eta|<1.45, 15<E_{T}<20)", "et[0]>15&&et[0]<20&&abs(eta[0])<1.45"),
89      ("barrel20to30", "(|#eta|<1.45, 20<E_{T}<30)", "et[0]>20&&et[0]<30&&abs(eta[0])<1.45"),
90      ("endcap15to20", "(1.7<|#eta|<2.5, 15<E_{T}<20)", "et[0]>15&&et[0]<20&&abs(eta[0])>1.7&&abs(eta[0])<2.5"),
91      ("endcap20to30", "(1.7<|#eta|<2.5, 20<E_{T}<30)", "et[0]>20&&et[0]<30&&abs(eta[0])>1.7&&abs(eta[0])<2.5")
92 < #    ("","",""),  # all plots with no special cuts
92 <    )
92 >    ]
93  
94   # Define histograms to plot
95   bins_et     = array("f", [15.0, 20.0, 30.0, 50.0, 80.0, 120.0])
96   bins_eta    = array("f", [-2.5, -1.7, -1.45, 0.0, 1.45, 1.7, 2.5])
97 < listOfPlots = [
97 > list_of_plots = [
98      Plot("et[0]"           , TH1F("pho_et"           , "Lead #gamma: E_{T};E_{T} (GeV);entries/bin", len(bins_et)-1, bins_et)),
99      Plot("eta[0]"          , TH1F("pho_eta"          , "Lead #gamma: #eta;#eta;entries/bin"        , len(bins_eta)-1, bins_eta)),
100      Plot("sigmaIetaIeta[0]", TH1F("pho_sigmaIEtaIEta", "Lead #gamma: #sigma_{i#etai#eta};#sigma_{i#etai#eta};entries/bin",20, 0, 0.06)),
# Line 108 | Line 108 | listOfPlots = [
108      f.close()
109      print "Created default configuration file: t2h_config.py"
110      print "Edit it, and run with:"
111 <    print "  tree2hists t2h_config.py"
112 < ########################################
111 >    print "  tree2hists.py t2h_config.py"
112 > ##############################################################################
113  
114 + def make_all_hists_all_files(list_of_RootTrees, list_of_Plots, cut_for_all_files, list_of_cutSets):
115 +    '''Open root files one at a time, make plots, then close them.'''
116 +    list_of_plots_to_write = []
117 +
118 +    # Create plots for each set of cuts
119 +    for set_of_cuts in list_of_cutSets:
120 +        histname_fix, title_fix, current_cut_set = set_of_cuts
121 +        for plot in list_of_Plots:
122 +            new_plot  = deepcopy(plot)
123 +            new_title = ' '.join((plot.histogram.GetTitle(), title_fix))
124 +            new_plot.histogram.SetTitle(new_title)
125 +            list_of_plots_to_write.append((new_plot, set_of_cuts))
126 +    
127 +    for j, rootTree in enumerate(list_of_RootTrees):
128 +        rootTree.tfile = TFile(rootTree.fileName, "read")           # Open TFile
129 +        if rootTree.tfile.IsZombie():
130 +            print "Error opening %s, exiting..." % rootTree.fileName
131 +            sys.exit(0)
132 +        try:                                      # Try to get TTree from file.
133 +            rootTree.tfile.GetObject(rootTree.treeName, rootTree.ttree)
134 +        except:
135 +            print "Error: %s not found in %s, exiting..." % (rootTree.treeName,
136 +                                                             rootTree.fileName)
137 +            sys.exit(1)
138  
139 < ########################################
140 < if __name__ == '__main__':
141 <    if len(sys.argv) > 1:
142 <        if path.isfile(sys.argv[1]):
143 <            config_file = sys.argv[1].split('.')[0]
144 <            try:
145 <                exec("from " + config_file + " import *")
146 <            except Exception, e:
147 <                print e
139 >        print "\n%s: Opened %s  %i MB" % (datetime.now().strftime("%I:%M%p"),
140 >                                          rootTree.fileName,
141 >                                          path.getsize(rootTree.fileName)/1048576)
142 >        print " %s has %i entries, will plot with scale=%.2e, cuts='%s'" % (rootTree.treeName,
143 >                                                                            rootTree.ttree.GetEntries(),
144 >                                                                            rootTree.scale,
145 >                                                                            rootTree.cuts)
146 >        
147 >        # Loop over plots
148 >        print "   # entries                  var >> histogram"
149 >        for i, (plot, set_of_cuts) in enumerate(list_of_plots_to_write):
150 >            histname_fix, title_fix, current_cut_set = set_of_cuts
151 >            tmp_hist = plot.histogram.Clone("temp")    # Create temp hist
152 >            all_cuts = join_cuts(cut_for_all_files, rootTree.cuts,
153 >                                 current_cut_set, plot.cuts) # Set cuts
154 >            rootTree.ttree.Draw( "%s >> temp" % plot.treeVariable, all_cuts,
155 >                                 "goff")               # Draw with graphics off
156 >            tmp_hist.Scale(rootTree.scale)             # Scale temp
157 >            entries_before = plot.histogram.GetEntries()
158 >            plot.histogram.Add(tmp_hist)               # Add temp hist to total
159 >            entries_after = plot.histogram.GetEntries()
160 >            print " %3i %7i %20s >> %s/%s" % (i, entries_after-entries_before,
161 >                                              plot.treeVariable, histname_fix,
162 >                                              plot.histogram.GetName()),
163 >            if plot.cuts:
164 >                print "\textra cuts: %s" % plot.cuts, # plot-specific cuts
165 >            print
166 >            
167 >        rootTree.tfile.Close()                                    # Close TFile
168 >        print "%s: Closed %s" % (datetime.now().strftime("%I:%M%p"),
169 >                                 rootTree.fileName)
170 >    return list_of_plots_to_write
171 >
172 >
173 > ######## Define the main program #############################################
174 > def tree2hists_main(config_file):
175 >    try:
176 >        # Import only certain variables
177 >        _temp = __import__(config_file, globals(), locals(),
178 >                           ['list_of_files','output_filename',
179 >                            'cut_for_all_files','cut_sets','list_of_plots'], -1)
180 >        list_of_files     = _temp.list_of_files
181 >        output_filename   = _temp.output_filename
182 >        cut_for_all_files = _temp.cut_for_all_files
183 >        cut_sets          = _temp.cut_sets
184 >        list_of_plots     = _temp.list_of_plots
185 >        for rootTree in list_of_files:
186 >            if not path.isfile(rootTree.fileName):
187 >                print "Error:\n  %s\nnot found for input." % rootTree.fileName
188                  sys.exit(1)
189 <        else:
190 <            print "%s not found." % sys.argv[1]
191 <            print "Create default config file by running tree2hists.py with no argument."
189 >        hist_names = [plot.name for plot in list_of_plots]
190 >        if len(hist_names)>len(set(hist_names)):
191 >            print hist_names
192 >            print "Error: Each plot needs a unique name, exiting..."
193              sys.exit(1)
194 <    else:
195 <        writeDefaultConfig()
196 <        sys.exit(0)
194 >        if path.isfile(output_filename):
195 >            print "Warning: %s exists" % output_filename
196 >    except Exception, e:
197 >        print e
198 >        print "Error with %s" % config_file
199 >        sys.exit(1)
200  
201      if path.isfile('rootlogon.C'):
202          print "Loading rootlogon.C"
203 <        gROOT.Macro('rootlogon.C')          # Run ROOT logon script
203 >        gROOT.Macro('rootlogon.C')    # Load functions from rootlogon script
204  
205 <    outputFile = TFile(outputFilename, "recreate")      # Open output file
206 <    if not outputFile.IsZombie():
207 <        print "Opened %s for output." % outputFilename
205 >    if cut_sets:
206 >        print "\n%i defined cut sets:" % len(cut_sets)
207 >        for cut in cut_sets:
208 >            name, title_fix, current_cut_set = cut
209 >            print "  %s\t: '%s'" % (name, current_cut_set)
210 >        cut_names = [name for name,num,cut in cut_sets]
211 >        if len(cut_names)>len(set(cut_names)):
212 >            print "Error: Each cut set needs a unique name, exiting..."
213 >            sys.exit(1)
214      else:
215 <        print "Error opening %s for output exiting..." % outputFilename
142 <        sys.exit(1)
215 >        cut_sets = [("","","")] # Make all plots, no extra cuts
216  
217 <    listOfPlotsToWrite = []
217 >    print "\nCuts to apply to all files:\n\t'%s'" % cut_for_all_files
218 >
219 >    start_time = datetime.now()
220 >    list_of_plots_to_write = make_all_hists_all_files(list_of_files,
221 >                                                      list_of_plots,
222 >                                                      cut_for_all_files,
223 >                                                      cut_sets)
224 >    end_time = datetime.now()
225 >    print "Done drawing all plots after %s." % duration_to_string(start_time, end_time)
226  
146    print "\nCuts applied to all plots from all files:\n  %s" % cutForAllFiles
147    numberOfPlots = len(listOfPlots)
148    print "\nCreating %i plots for each of %i cut sets..." % (numberOfPlots, len(cutSets))
149    for setOfCuts in cutSets:
150        histNamePostfix, titlePostfix, currentCutSet = setOfCuts
151        print '\n Cut set "%s":  %s' % (histNamePostfix, currentCutSet)
152        # Loop over all things to plot
153        for i, plot in enumerate(listOfPlots):
154            newTitle = ' '.join((plot.histogram.GetTitle(), titlePostfix))
155            newPlot  = deepcopy(plot)
156            dir = histNamePostfix
157            listOfPlotsToWrite.append((dir, newPlot))
158            newPlot.histogram.SetTitle(newTitle)
159            # Print plot being made
160            print "  %i %s >> %s/%s" % (i, newPlot.treeVariable, dir, newPlot.histogram.GetName()),
161            if newPlot.cuts:
162                print "\textra cuts: %s" % newPlot.cuts, # Plot-specific cuts
163            # Loop over all TTrees (from the different files)
164            for aFile in listOfFiles:
165                tempHist = newPlot.histogram.Clone("temp")                                   # Create temp histogram
166                cuts = joinNonEmpty(cutForAllFiles, aFile.cuts, currentCutSet, newPlot.cuts) # Set cuts for temp
167                aFile.ttree.Draw( "%s >> temp" % newPlot.treeVariable, cuts, "goff")         # Draw into temp; graphics off
168                tempHist.Scale(aFile.scale)                                                  # Scale temp
169                newPlot.histogram.Add(tempHist)                                              # Add temp to total histogram
170            print "%i" % newPlot.histogram.GetEntries()
171    print "done."
172    
227      #   Store and save/close files
228 +    outputFile = TFile(output_filename, "recreate")
229 +    if outputFile.IsZombie():
230 +        print "Error opening %s for output exiting..." % output_filename
231 +        sys.exit(1)
232 +    print "\nOpened output file. Saving histograms..."
233      outputFile.cd()
234 <    for setOfCuts in cutSets:
235 <        outputFile.mkdir(setOfCuts[0])
236 <    for dir, plot in listOfPlotsToWrite:
237 <        outputFile.cd(dir)
234 >    for set_of_cuts in cut_sets:
235 >        outputFile.mkdir(set_of_cuts[0])
236 >    print "   # entries  histogram"
237 >    for i, (plot, cutset) in enumerate(list_of_plots_to_write):
238 >        outputFile.cd(cutset[0])
239 >        print " %3i %7i  %s/%s" % (i, plot.histogram.GetEntries(),
240 >                                   cutset[0],
241 >                                   plot.histogram.GetName())
242          plot.histogram.Write()
180
181    print "Closing files...",
243      outputFile.Close()
244 <    for aFile in listOfFiles:
245 <        aFile.file.Close()
246 <    print "done.\n\nHistograms stored in\n  %s" % outputFilename
247 < ########################################
244 >    print "Done saving."
245 >    print "\nScaled & added histograms from %i TTrees saved in\n  %s" % (len(list_of_files), output_filename)
246 > ##############################################################################
247 >
248 > if __name__ == "__main__":
249 >    if len(sys.argv) > 1:
250 >        if path.isfile(sys.argv[1]):
251 >            config_file = sys.argv[1].split('.')[0]
252 >            tree2hists_main(config_file)
253 >        else:
254 >            print "%s not found." % sys.argv[1]
255 >            print("Create default config file by running tree2hists.py "
256 >                  "with no argument.")
257 >            sys.exit(1)
258 >    else:
259 >        if path.exists('t2h_config.py'):
260 >            print "Specify a config file, like:"
261 >            print "tree2hists.py t2h_config.py"
262 >            sys.exit(1)
263 >        write_default_T2H_config()
264 >        sys.exit(0)
265 >    #import cProfile
266 >    #cProfile.run('main()', 'fooprof')
267 >    #import pstats
268 >    #p = pstats.Stats('fooprof')
269 >    #p.sort_stats('cumulative').print_stats(15)

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines