ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/addingSamples.py
(Generate patch)

Comparing UserCode/VHbb/python/addingSamples.py (file contents):
Revision 1.3 by nmohr, Fri Jan 11 09:28:34 2013 UTC vs.
Revision 1.6 by nmohr, Tue Mar 5 12:48:33 2013 UTC

# Line 1 | Line 1
1 < #!/afs/cern.ch/cms/slc5_amd64_gcc434/cms/cmssw/CMSSW_4_2_8/external/slc5_amd64_gcc434/bin/python2.6
1 > #!/usr/bin/env python
2  
3 < import ROOT
3 > import sys,hashlib,ROOT
4 > ROOT.gROOT.SetBatch(True)
5   from array import array
6 + from optparse import OptionParser
7  
6
7 def getObj( infile, name ):
8    infile.cd()
9    for key in ROOT.gDirectory.GetListOfKeys():
10        obj = key.ReadObj()
11        print obj.GetName()
12        if obj.GetName() == name:
13            return obj;
14
15
16 def updateEventArray( tree, lheBin, N ):
17    for bin in lheBin:
18        print bin
19        N.append( (bin, 1.*tree.GetEntries(bin) ) )
20    return N
21    
8   def getTotal( bin, fileList ):
9      total = 0.
10      for i in range(0,len(fileList)):
11          total = total + fileList[i][2][bin]
26        #print total[bin]
12      return total
28    
29 inc = []
30 j1 = []
31 j2 = []
32 j3 = []
33 j4 = []
34 pt5070 = []
35 pt70100 = []
36 pt100180 = []
37 pt180 = []
38 ht200400 = []
39 ht400 = []
40
41 prefix='DiJetPt_noweight_'
42 fileList = [ [prefix+'DYJetsToLL_M-50_TuneZ2Star_8TeV-madgraph-tarball.root' , 2950.0, inc ] ,
43             [prefix+'DYJetsToLL_PtZ-50To70_TuneZ2star_8TeV-madgraph-tarball.root' , 93.8, pt5070 ],
44             [prefix+'DYJetsToLL_PtZ-70To100_TuneZ2star_8TeV-madgraph-tarball.root' , 52.31, pt70100 ],
45             [prefix+'DYJetsToLL_PtZ-100_TuneZ2star_8TeV-madgraph.root' , 34.1, pt100180 ],
46             [prefix+'DYJetsToLL_PtZ-180_TuneZ2star_8TeV-madgraph.root' , 4.56, pt180 ],
47             [prefix+'DY1JetsToLL_M-50_TuneZ2Star_8TeV-madgraph.root' , 561.0, j1 ] ,
48             [prefix+'DY2JetsToLL_M-50_TuneZ2Star_8TeV-madgraph.root' , 181.0, j2 ] ,
49             [prefix+'DY3JetsToLL_M-50_TuneZ2Star_8TeV-madgraph.root' , 51.1, j3 ] ,
50             [prefix+'DY4JetsToLL_M-50_TuneZ2Star_8TeV-madgraph.root' , 23.04, j4 ] ,
51             [prefix+'DYJetsToLL_HT-200To400_TuneZ2Star_8TeV-madgraph.root' , 19.73, ht200400 ],
52             [prefix+'DYJetsToLL_HT-400ToInf_TuneZ2Star_8TeV-madgraph.root' , 2.826, ht400 ] ]
53
54 #look here https://www.evernote.com/shard/s186/sh/8ffc289c-ede2-4e09-83ba-1e1981f13617/4d5aac2f42a9fd480dc66f9303c1c217
55
56 lheBin = [
57
58    'lheV_pt < 50 & lheNj == 0 & lheHT < 200',
59          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 0 & lheHT < 200',
60          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 0 & lheHT < 200',
61          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 0 & lheHT < 200',
62          'lheV_pt > 180 & lheNj == 0 & lheHT < 200',
63
64          'lheV_pt < 50 & lheNj == 1 & lheHT < 200',
65          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 1 & lheHT < 200',
66          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 1 & lheHT < 200',
67          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 1 & lheHT < 200',
68          'lheV_pt > 180 & lheNj == 1 & lheHT < 200',
69          
70          'lheV_pt < 50 & lheNj == 2 & lheHT < 200',
71          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 2 & lheHT < 200',
72          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 2 & lheHT < 200',
73          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 2 & lheHT < 200',
74          'lheV_pt > 180 & lheNj == 2 & lheHT < 200',
75          
76          'lheV_pt < 50 & lheNj == 3 & lheHT < 200',
77          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 3 & lheHT < 200',
78          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 3 & lheHT < 200',
79          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 3 & lheHT < 200',
80          'lheV_pt > 180 & lheNj == 3 & lheHT < 200',
81          
82          'lheV_pt < 50 & lheNj == 4 & lheHT < 200',
83          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 4 & lheHT < 200',
84          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 4 & lheHT < 200',
85          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 4 & lheHT < 200',
86          'lheV_pt > 180 & lheNj == 4 & lheHT < 200',
87
88
89   'lheV_pt < 50 & lheNj == 0 & lheHT > 200 & lheHT < 400',
90          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 0 & lheHT > 200 & lheHT < 400',
91          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 0 & lheHT > 200 & lheHT < 400',
92          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 0 & lheHT > 200 & lheHT < 400',
93          'lheV_pt > 180 & lheNj == 0 & lheHT > 200 & lheHT < 400',
94
95          'lheV_pt < 50 & lheNj == 1 & lheHT > 200 & lheHT < 400',
96          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 1 & lheHT > 200 & lheHT < 400',
97          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 1 & lheHT > 200 & lheHT < 400',
98          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 1 & lheHT > 200 & lheHT < 400',
99          'lheV_pt > 180 & lheNj == 1 & lheHT > 200 & lheHT < 400',
100          
101          'lheV_pt < 50 & lheNj == 2 & lheHT > 200 & lheHT < 400',
102          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 2 & lheHT > 200 & lheHT < 400',
103          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 2 & lheHT > 200 & lheHT < 400',
104          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 2 & lheHT > 200 & lheHT < 400',
105          'lheV_pt > 180 & lheNj == 2 & lheHT > 200 & lheHT < 400',
106          
107          'lheV_pt < 50 & lheNj == 3 & lheHT > 200 & lheHT < 400',
108          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 3 & lheHT > 200 & lheHT < 400',
109          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 3 & lheHT > 200 & lheHT < 400',
110          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 3 & lheHT > 200 & lheHT < 400',
111          'lheV_pt > 180 & lheNj == 3 & lheHT > 200 & lheHT < 400',
112          
113          'lheV_pt < 50 & lheNj == 4 & lheHT > 200 & lheHT < 400',
114          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 4 & lheHT > 200 & lheHT < 400',
115          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 4 & lheHT > 200 & lheHT < 400',
116          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 4 & lheHT > 200 & lheHT < 400',
117          'lheV_pt > 180 & lheNj == 4 & lheHT > 200 & lheHT < 400',
118
119
120    'lheV_pt < 50 & lheNj == 0 & lheHT > 400',
121          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 0 & lheHT > 400',
122          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 0 & lheHT > 400',
123          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 0 & lheHT > 400',
124          'lheV_pt > 180 & lheNj == 0 & lheHT > 400',
125
126          'lheV_pt < 50 & lheNj == 1 & lheHT > 400',
127          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 1 & lheHT > 400',
128          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 1 & lheHT > 400',
129          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 1 & lheHT > 400',
130          'lheV_pt > 180 & lheNj == 1 & lheHT > 400',
131          
132          'lheV_pt < 50 & lheNj == 2 & lheHT > 400',
133          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 2 & lheHT > 400',
134          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 2 & lheHT > 400',
135          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 2 & lheHT > 400',
136          'lheV_pt > 180 & lheNj == 2 & lheHT > 400',
137          
138          'lheV_pt < 50 & lheNj == 3 & lheHT > 400',
139          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 3 & lheHT > 400',
140          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 3 & lheHT > 400',
141          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 3 & lheHT > 400',
142          'lheV_pt > 180 & lheNj == 3 & lheHT > 400',
143          
144          'lheV_pt < 50 & lheNj == 4 & lheHT > 400',
145          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 4 & lheHT > 400',
146          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 4 & lheHT > 400',
147          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 4 & lheHT > 400',
148          'lheV_pt > 180 & lheNj == 4 & lheHT > 400'
149
150
151    ]
152
153 eventList = {}
154 num = []
155 for file in fileList:
156    #file.append( 1 )
157    #continue
158    print file
159    infile = ROOT.TFile(file[0],"READ")
160    tree = getObj(infile, 'tree')
161    for bin in lheBin:
162        print bin
163        file[2].append( 1.*tree.GetEntries(bin) )
164 #    file[2] = updateEventArray( tree, lheBin, file[2] )
165    count = getObj( infile, 'CountWithPU' )
166    file.append( count.GetBinContent(1) )
167    print fileList
168
169 CountIncl = fileList[0][3]
170 print fileList
13  
14  
15 < #print num[fileList[1]]
15 > argv = sys.argv
16 > parser = OptionParser()
17  
18 < #total -> total numer of events in each lheBin
19 < print 'Calculating total'
20 < total = []
21 < weight= []
22 < for bin in range(0, len(lheBin) ):
23 <    #weight.append(1.)
24 <    #continue
25 <    total.append(getTotal(bin, fileList))
26 <    print bin
27 <        #to better stich we need the highest stat (that should correspond to the binned sample relative to the bin)
28 <    fileList.sort( key=lambda file: file[2][bin], reverse=True )
29 <    print 'After sorting'
30 <    print fileList
31 <    if total[bin] > 0.:
32 <        #the first is always the one with the highest N in the bin:
33 <        weight.append( (fileList[0][1]/fileList[0][3]) * (CountIncl/2950.0) * fileList[0][2][bin]/total[bin] )
34 <        print weight[bin]
18 > parser.add_option("-C", "--config", dest="config", default=[], action="append",
19 >                              help="configuration defining bins")
20 > parser.add_option("-A", "--apply", dest="apply", default=False, action="store_true",
21 >                              help="configuration defining the")
22 > parser.add_option("-W", "--weight", dest="weights", default=True, action="store_false",
23 >                              help="Calculate the weights")
24 > parser.add_option("-V", "--validate", dest="validate", default=True, action="store_false",
25 >                              help="Make validation plot the weights")
26 > (opts, args) = parser.parse_args(argv)
27 > if opts.config =="":
28 >    opts.config = "config"
29 >
30 > from myutils import BetterConfigParser
31 >
32 > print opts.config
33 > config = BetterConfigParser()
34 > config.read(opts.config)
35 > prefix = config.get('LHEWeights','prefix')
36 > newpostfix = config.get('LHEWeights','newpostfix')
37 > inclusive = config.get('LHEWeights','inclusive')
38 > files = eval(config.get('LHEWeights','fileList'))
39 > lheBin = eval(config.get('LHEWeights','lheBin'))
40 > fileList = []
41 > for file in files:
42 >    new_entry = [prefix+file+'.root',files[file],[]]
43 >    if file == inclusive:
44 >        fileList.insert(0, new_entry)
45      else:
46 <        weight.append(1.)
46 >        fileList.append(new_entry)
47 > print fileList
48 > print lheBin
49  
50 < print weight
51 <    
52 < #now add the branch with the weight normalized to the inclusive
53 < for file in fileList:
54 <    infile = ROOT.TFile(file[0],"READ")
55 <    outfile = ROOT.TFile('lheWeight.'+file[0],'RECREATE')
56 <    histoInfile = ROOT.TFile(prefix+'DYJetsToLL_M-50_TuneZ2Star_8TeV-madgraph-tarball.root',"READ")
57 <    histoInfile.cd()
58 <    obj = ROOT.TObject
59 <    for key in ROOT.gDirectory.GetListOfKeys():
60 <        histoInfile.cd()
61 <        obj = key.ReadObj()
62 <        print obj.GetName()
208 <        if obj.GetName() == 'tree':
209 <            continue
210 <        outfile.cd()
211 <        print key.GetName()
212 <        obj.Write(key.GetName())
50 > def get_weights(fileList,lheBin):    
51 >    for file in fileList:
52 >        #file.append( 1 )
53 >        #continue
54 >        print file
55 >        infile = ROOT.TFile.Open(file[0],"READ")
56 >        tree = infile.Get('tree')
57 >        for bin in lheBin:
58 >            file[2].append( float(tree.GetEntries(bin)) )
59 >        count = infile.Get('CountWithPU')
60 >        file.append( count.GetBinContent(1) )
61 >        infile.Close()
62 >        del tree
63  
64 <    infile.cd()
65 <    tree = infile.Get('tree')
216 <    outfile.cd()
217 <    newtree = tree.CloneTree(0)
218 <    lheWeight = array('f',[0])
219 <    newtree.Branch('lheWeight',lheWeight,'lheWeight/F')
64 >    CountIncl = fileList[0][3]
65 >    print fileList
66  
67 <    nEntries = tree.GetEntries()
68 <    theBinForms = []
67 >    #total -> total numer of events in each lheBin
68 >    print 'Calculating total'
69 >    total = []
70 >    weight= []
71      for bin in range(0, len(lheBin) ):
72 <        theBinForms.append(ROOT.TTreeFormula("Bin_formula_%s"%(bin),lheBin[bin],tree))
73 <    for entry in range(0,nEntries):
74 <        tree.GetEntry(entry)
75 <        i = -1
76 <        for bin in range(0, len(lheBin) ):
77 <            if theBinForms[bin].EvalInstance() > 0.:
78 <                i = bin
79 <        if i > -1:
80 <            lheWeight[0] = weight[i]
72 >        #weight.append(1.)
73 >        #continue
74 >        total.append(getTotal(bin, fileList))
75 >        #print lheBin[bin]
76 >        #to better stich we need the highest stat (that should correspond to the binned sample relative to the bin)
77 >        fileList.sort( key=lambda file: file[2][bin], reverse=True )
78 >        #print 'After sorting'
79 >        #print fileList
80 >        if total[bin] > 0.:
81 >            #the first is always the one with the highest N in the bin:
82 >            weight.append( (fileList[0][1]/fileList[0][3]) * (CountIncl/2950.0) * fileList[0][2][bin]/total[bin] )
83          else:
84 <            lheWeight[0] = 1.
84 >            weight.append(1.)
85  
86 <            
87 <        newtree.Fill()
88 <                  
89 <    newtree.AutoSave()
90 <    outfile.Write()
241 <    outfile.Close()
86 >    weight_map = {}
87 >    for bin in range(0, len(lheBin) ):
88 >        print '%s: %.2f' %(lheBin[bin], weight[bin])
89 >        weight_map[lheBin[bin]] = weight[bin]
90 >    return weight_map
91  
92 +
93 + def apply_weights(fileList,weight_map,inclusive,newpostfix):    
94 +    #now add the branch with the weight normalized to the inclusive
95 +    for file in fileList:
96 +        outfile = ROOT.TFile.Open(file[0].replace('.root',newpostfix),'RECREATE')
97 +        infile = ROOT.TFile.Open(file[0],"READ")
98 +        histoInfile = ROOT.TFile.Open(inclusive,"READ")
99 +        histoInfile.cd()
100 +        obj = ROOT.TObject
101 +        for key in ROOT.gDirectory.GetListOfKeys():
102 +            histoInfile.cd()
103 +            obj = key.ReadObj()
104 +            #print obj.GetName()
105 +            if obj.GetName() == 'tree':
106 +                continue
107 +            outfile.cd()
108 +            #print key.GetName()
109 +            obj.Write(key.GetName())
110 +
111 +        infile.cd()
112 +        tree = infile.Get('tree')
113 +        outfile.cd()
114 +        newtree = tree.CloneTree(0)
115 +        lheWeight = array('f',[0])
116 +        newtree.Branch('lheWeight',lheWeight,'lheWeight/F')
117 +
118 +        nEntries = tree.GetEntries()
119 +        theBinForms = {}
120 +        for bin in weight_map:
121 +            theBinForms[bin] = ROOT.TTreeFormula("Bin_formula_%s"%(bin),bin,tree)
122 +        for entry in range(0,nEntries):
123 +            tree.GetEntry(entry)
124 +            match_bin = None
125 +            for bin in weight_map:
126 +                if theBinForms[bin].EvalInstance() > 0.:
127 +                    match_bin = bin
128 +            if match_bin:
129 +                lheWeight[0] = weight_map[match_bin]
130 +            else:
131 +                lheWeight[0] = 1.
132 +            newtree.Fill()
133 +                  
134 +        newtree.AutoSave()
135 +        outfile.Write()
136 +        outfile.Close()
137 +        infile.Close()
138 +        del tree
139 +        del newtree
140 +
141 + def do_validation(fileList,inclusive,newpostfix):
142 +    histo = ROOT.TH1F("lheV_pt","lheV_pt",300,0,300)
143 +    histo1 = ROOT.TH1F("lheV_ptInc","lheV_ptInc",300,0,300)
144 +    histo.SetDirectory(0)
145 +    histo1.SetDirectory(0)
146 +
147 +    for file in fileList:
148 +        h_name=str(hashlib.sha224(file[0]).hexdigest())
149 +        print file[0]
150 +        print h_name
151 +        tfile = ROOT.TFile.Open(file[0].replace('.root',newpostfix),"read")
152 +        tree = tfile.Get("tree")
153 +        h_tmp = ROOT.TH1F(h_name,h_name,300,0,300)
154 +        #ROOT.gDirectory.ls()
155 +        tree.Draw("lheV_pt>>"+str(h_name),"(lheWeight)*(lheV_pt < 300.)","goff")
156 +        if inclusive in file[0]:
157 +            h_tmp1 = ROOT.TH1F(h_name+'Inc',h_name+'Inc',300,0,300)
158 +            tree.Draw("lheV_pt>>"+str(h_name+'Inc'),"(lheV_pt < 300.)","goff")
159 +            histo1.Add(h_tmp1)
160 +        histo.Add(h_tmp)
161 +        tfile.Close()
162 +        del tree
163 +
164 +
165 +    canvas = ROOT.TCanvas("lheV_pt","lheV_pt",600,600)
166 +    ROOT.gPad.SetLogy()
167 +    histo.Draw()
168 +    histo1.Draw('SAME')
169 +    print histo.Integral(100,300)
170 +    print histo1.Integral(100,300)
171 +    histo1.SetLineColor(ROOT.kRed)
172 +    canvas.Print("validation_lheV_pt.pdf","pdf")
173 +
174 + if opts.weights:
175 +    weight_map = get_weights(fileList,lheBin)
176 +    config.set('LHEWeights', 'weights_per_bin', '%s' %weight_map)
177 +    f = open('8TeVconfig/lhe_weights', 'w')
178 +    config.write(f)
179 +    f.close()
180 + elif opts.apply:
181 +    weight_map = config.get('LHEWeights', 'weights_per_bin')
182 + if opts.apply:
183 +    apply_weights(fileList,weight_map,prefix+inclusive,newpostfix)
184 + if opts.validate:
185 +    do_validation(fileList,inclusive,newpostfix)

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines