ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/VHbb/python/addingSamples.py
(Generate patch)

Comparing UserCode/VHbb/python/addingSamples.py (file contents):
Revision 1.3 by nmohr, Fri Jan 11 09:28:34 2013 UTC vs.
Revision 1.4 by nmohr, Fri Mar 1 09:47:11 2013 UTC

# Line 1 | Line 1
1   #!/afs/cern.ch/cms/slc5_amd64_gcc434/cms/cmssw/CMSSW_4_2_8/external/slc5_amd64_gcc434/bin/python2.6
2  
3 < import ROOT
3 > import sys,ROOT
4 > ROOT.gROOT.SetBatch(True)
5   from array import array
6 + from optparse import OptionParser
7  
6
7 def getObj( infile, name ):
8    infile.cd()
9    for key in ROOT.gDirectory.GetListOfKeys():
10        obj = key.ReadObj()
11        print obj.GetName()
12        if obj.GetName() == name:
13            return obj;
14
15
16 def updateEventArray( tree, lheBin, N ):
17    for bin in lheBin:
18        print bin
19        N.append( (bin, 1.*tree.GetEntries(bin) ) )
20    return N
21    
8   def getTotal( bin, fileList ):
9      total = 0.
10      for i in range(0,len(fileList)):
11          total = total + fileList[i][2][bin]
26        #print total[bin]
12      return total
28    
29 inc = []
30 j1 = []
31 j2 = []
32 j3 = []
33 j4 = []
34 pt5070 = []
35 pt70100 = []
36 pt100180 = []
37 pt180 = []
38 ht200400 = []
39 ht400 = []
40
41 prefix='DiJetPt_noweight_'
42 fileList = [ [prefix+'DYJetsToLL_M-50_TuneZ2Star_8TeV-madgraph-tarball.root' , 2950.0, inc ] ,
43             [prefix+'DYJetsToLL_PtZ-50To70_TuneZ2star_8TeV-madgraph-tarball.root' , 93.8, pt5070 ],
44             [prefix+'DYJetsToLL_PtZ-70To100_TuneZ2star_8TeV-madgraph-tarball.root' , 52.31, pt70100 ],
45             [prefix+'DYJetsToLL_PtZ-100_TuneZ2star_8TeV-madgraph.root' , 34.1, pt100180 ],
46             [prefix+'DYJetsToLL_PtZ-180_TuneZ2star_8TeV-madgraph.root' , 4.56, pt180 ],
47             [prefix+'DY1JetsToLL_M-50_TuneZ2Star_8TeV-madgraph.root' , 561.0, j1 ] ,
48             [prefix+'DY2JetsToLL_M-50_TuneZ2Star_8TeV-madgraph.root' , 181.0, j2 ] ,
49             [prefix+'DY3JetsToLL_M-50_TuneZ2Star_8TeV-madgraph.root' , 51.1, j3 ] ,
50             [prefix+'DY4JetsToLL_M-50_TuneZ2Star_8TeV-madgraph.root' , 23.04, j4 ] ,
51             [prefix+'DYJetsToLL_HT-200To400_TuneZ2Star_8TeV-madgraph.root' , 19.73, ht200400 ],
52             [prefix+'DYJetsToLL_HT-400ToInf_TuneZ2Star_8TeV-madgraph.root' , 2.826, ht400 ] ]
53
54 #look here https://www.evernote.com/shard/s186/sh/8ffc289c-ede2-4e09-83ba-1e1981f13617/4d5aac2f42a9fd480dc66f9303c1c217
55
56 lheBin = [
57
58    'lheV_pt < 50 & lheNj == 0 & lheHT < 200',
59          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 0 & lheHT < 200',
60          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 0 & lheHT < 200',
61          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 0 & lheHT < 200',
62          'lheV_pt > 180 & lheNj == 0 & lheHT < 200',
63
64          'lheV_pt < 50 & lheNj == 1 & lheHT < 200',
65          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 1 & lheHT < 200',
66          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 1 & lheHT < 200',
67          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 1 & lheHT < 200',
68          'lheV_pt > 180 & lheNj == 1 & lheHT < 200',
69          
70          'lheV_pt < 50 & lheNj == 2 & lheHT < 200',
71          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 2 & lheHT < 200',
72          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 2 & lheHT < 200',
73          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 2 & lheHT < 200',
74          'lheV_pt > 180 & lheNj == 2 & lheHT < 200',
75          
76          'lheV_pt < 50 & lheNj == 3 & lheHT < 200',
77          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 3 & lheHT < 200',
78          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 3 & lheHT < 200',
79          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 3 & lheHT < 200',
80          'lheV_pt > 180 & lheNj == 3 & lheHT < 200',
81          
82          'lheV_pt < 50 & lheNj == 4 & lheHT < 200',
83          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 4 & lheHT < 200',
84          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 4 & lheHT < 200',
85          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 4 & lheHT < 200',
86          'lheV_pt > 180 & lheNj == 4 & lheHT < 200',
87
88
89   'lheV_pt < 50 & lheNj == 0 & lheHT > 200 & lheHT < 400',
90          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 0 & lheHT > 200 & lheHT < 400',
91          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 0 & lheHT > 200 & lheHT < 400',
92          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 0 & lheHT > 200 & lheHT < 400',
93          'lheV_pt > 180 & lheNj == 0 & lheHT > 200 & lheHT < 400',
94
95          'lheV_pt < 50 & lheNj == 1 & lheHT > 200 & lheHT < 400',
96          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 1 & lheHT > 200 & lheHT < 400',
97          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 1 & lheHT > 200 & lheHT < 400',
98          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 1 & lheHT > 200 & lheHT < 400',
99          'lheV_pt > 180 & lheNj == 1 & lheHT > 200 & lheHT < 400',
100          
101          'lheV_pt < 50 & lheNj == 2 & lheHT > 200 & lheHT < 400',
102          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 2 & lheHT > 200 & lheHT < 400',
103          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 2 & lheHT > 200 & lheHT < 400',
104          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 2 & lheHT > 200 & lheHT < 400',
105          'lheV_pt > 180 & lheNj == 2 & lheHT > 200 & lheHT < 400',
106          
107          'lheV_pt < 50 & lheNj == 3 & lheHT > 200 & lheHT < 400',
108          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 3 & lheHT > 200 & lheHT < 400',
109          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 3 & lheHT > 200 & lheHT < 400',
110          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 3 & lheHT > 200 & lheHT < 400',
111          'lheV_pt > 180 & lheNj == 3 & lheHT > 200 & lheHT < 400',
112          
113          'lheV_pt < 50 & lheNj == 4 & lheHT > 200 & lheHT < 400',
114          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 4 & lheHT > 200 & lheHT < 400',
115          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 4 & lheHT > 200 & lheHT < 400',
116          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 4 & lheHT > 200 & lheHT < 400',
117          'lheV_pt > 180 & lheNj == 4 & lheHT > 200 & lheHT < 400',
118
119
120    'lheV_pt < 50 & lheNj == 0 & lheHT > 400',
121          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 0 & lheHT > 400',
122          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 0 & lheHT > 400',
123          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 0 & lheHT > 400',
124          'lheV_pt > 180 & lheNj == 0 & lheHT > 400',
125
126          'lheV_pt < 50 & lheNj == 1 & lheHT > 400',
127          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 1 & lheHT > 400',
128          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 1 & lheHT > 400',
129          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 1 & lheHT > 400',
130          'lheV_pt > 180 & lheNj == 1 & lheHT > 400',
131          
132          'lheV_pt < 50 & lheNj == 2 & lheHT > 400',
133          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 2 & lheHT > 400',
134          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 2 & lheHT > 400',
135          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 2 & lheHT > 400',
136          'lheV_pt > 180 & lheNj == 2 & lheHT > 400',
137          
138          'lheV_pt < 50 & lheNj == 3 & lheHT > 400',
139          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 3 & lheHT > 400',
140          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 3 & lheHT > 400',
141          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 3 & lheHT > 400',
142          'lheV_pt > 180 & lheNj == 3 & lheHT > 400',
143          
144          'lheV_pt < 50 & lheNj == 4 & lheHT > 400',
145          'lheV_pt > 50 & lheV_pt < 70 & lheNj == 4 & lheHT > 400',
146          'lheV_pt > 70 & lheV_pt < 100 & lheNj == 4 & lheHT > 400',
147          'lheV_pt > 100 & lheV_pt < 180 & lheNj == 4 & lheHT > 400',
148          'lheV_pt > 180 & lheNj == 4 & lheHT > 400'
149
150
151    ]
152
153 eventList = {}
154 num = []
155 for file in fileList:
156    #file.append( 1 )
157    #continue
158    print file
159    infile = ROOT.TFile(file[0],"READ")
160    tree = getObj(infile, 'tree')
161    for bin in lheBin:
162        print bin
163        file[2].append( 1.*tree.GetEntries(bin) )
164 #    file[2] = updateEventArray( tree, lheBin, file[2] )
165    count = getObj( infile, 'CountWithPU' )
166    file.append( count.GetBinContent(1) )
167    print fileList
168
169 CountIncl = fileList[0][3]
170 print fileList
13  
14  
15 < #print num[fileList[1]]
15 > argv = sys.argv
16 > parser = OptionParser()
17  
18 < #total -> total numer of events in each lheBin
19 < print 'Calculating total'
20 < total = []
21 < weight= []
22 < for bin in range(0, len(lheBin) ):
23 <    #weight.append(1.)
24 <    #continue
25 <    total.append(getTotal(bin, fileList))
26 <    print bin
27 <        #to better stich we need the highest stat (that should correspond to the binned sample relative to the bin)
28 <    fileList.sort( key=lambda file: file[2][bin], reverse=True )
29 <    print 'After sorting'
30 <    print fileList
31 <    if total[bin] > 0.:
32 <        #the first is always the one with the highest N in the bin:
33 <        weight.append( (fileList[0][1]/fileList[0][3]) * (CountIncl/2950.0) * fileList[0][2][bin]/total[bin] )
34 <        print weight[bin]
18 > parser.add_option("-C", "--config", dest="config", default=[], action="append",
19 >                              help="configuration defining the plots to make")
20 > (opts, args) = parser.parse_args(argv)
21 > if opts.config =="":
22 >    opts.config = "config"
23 >
24 > from myutils import BetterConfigParser
25 >
26 > print opts.config
27 > config = BetterConfigParser()
28 > config.read(opts.config)
29 > prefix = config.get('LHEWeights','prefix')
30 > newprefix = config.get('LHEWeights','newprefix')
31 > inclusive = config.get('LHEWeights','inclusive')
32 > files = eval(config.get('LHEWeights','fileList'))
33 > lheBin = eval(config.get('LHEWeights','lheBin'))
34 > fileList = []
35 > for file in files:
36 >    new_entry = [prefix+file,files[file],[]]
37 >    if file == inclusive:
38 >        fileList.insert(0, new_entry)
39      else:
40 <        weight.append(1.)
40 >        fileList.append(new_entry)
41 > print fileList
42 > print lheBin
43  
44 < print weight
45 <    
46 < #now add the branch with the weight normalized to the inclusive
47 < for file in fileList:
48 <    infile = ROOT.TFile(file[0],"READ")
49 <    outfile = ROOT.TFile('lheWeight.'+file[0],'RECREATE')
50 <    histoInfile = ROOT.TFile(prefix+'DYJetsToLL_M-50_TuneZ2Star_8TeV-madgraph-tarball.root',"READ")
51 <    histoInfile.cd()
52 <    obj = ROOT.TObject
53 <    for key in ROOT.gDirectory.GetListOfKeys():
54 <        histoInfile.cd()
55 <        obj = key.ReadObj()
56 <        print obj.GetName()
208 <        if obj.GetName() == 'tree':
209 <            continue
210 <        outfile.cd()
211 <        print key.GetName()
212 <        obj.Write(key.GetName())
44 > def get_weights(fileList,lheBin):    
45 >    for file in fileList:
46 >        #file.append( 1 )
47 >        #continue
48 >        print file
49 >        infile = ROOT.TFile(file[0],"READ")
50 >        tree = infile.Get('tree')
51 >        for bin in lheBin:
52 >            file[2].append( float(tree.GetEntries(bin)) )
53 >        count = infile.Get('CountWithPU')
54 >        file.append( count.GetBinContent(1) )
55 >        infile.Close()
56 >        del tree
57  
58 <    infile.cd()
59 <    tree = infile.Get('tree')
216 <    outfile.cd()
217 <    newtree = tree.CloneTree(0)
218 <    lheWeight = array('f',[0])
219 <    newtree.Branch('lheWeight',lheWeight,'lheWeight/F')
58 >    CountIncl = fileList[0][3]
59 >    print fileList
60  
61 <    nEntries = tree.GetEntries()
62 <    theBinForms = []
61 >    #total -> total numer of events in each lheBin
62 >    print 'Calculating total'
63 >    total = []
64 >    weight= []
65      for bin in range(0, len(lheBin) ):
66 <        theBinForms.append(ROOT.TTreeFormula("Bin_formula_%s"%(bin),lheBin[bin],tree))
67 <    for entry in range(0,nEntries):
68 <        tree.GetEntry(entry)
69 <        i = -1
70 <        for bin in range(0, len(lheBin) ):
71 <            if theBinForms[bin].EvalInstance() > 0.:
72 <                i = bin
73 <        if i > -1:
74 <            lheWeight[0] = weight[i]
66 >        #weight.append(1.)
67 >        #continue
68 >        total.append(getTotal(bin, fileList))
69 >        #print lheBin[bin]
70 >        #to better stich we need the highest stat (that should correspond to the binned sample relative to the bin)
71 >        fileList.sort( key=lambda file: file[2][bin], reverse=True )
72 >        #print 'After sorting'
73 >        #print fileList
74 >        if total[bin] > 0.:
75 >            #the first is always the one with the highest N in the bin:
76 >            weight.append( (fileList[0][1]/fileList[0][3]) * (CountIncl/2950.0) * fileList[0][2][bin]/total[bin] )
77          else:
78 <            lheWeight[0] = 1.
78 >            weight.append(1.)
79  
80 <            
81 <        newtree.Fill()
82 <                  
83 <    newtree.AutoSave()
84 <    outfile.Write()
241 <    outfile.Close()
80 >    weight_map = {}
81 >    for bin in range(0, len(lheBin) ):
82 >        print '%s: %.2f' %(lheBin[bin], weight[bin])
83 >        weight_map[lheBin[bin]] = weight[bin]
84 >    return weight_map
85  
86 +
87 + def apply_weights(fileList,weight_map,inclusive,newprefix):    
88 +    #now add the branch with the weight normalized to the inclusive
89 +    for file in fileList:
90 +        outfile = ROOT.TFile(newprefix+file[0],'RECREATE')
91 +        infile = ROOT.TFile(file[0],"READ")
92 +        histoInfile = ROOT.TFile(inclusive,"READ")
93 +        histoInfile.cd()
94 +        obj = ROOT.TObject
95 +        for key in ROOT.gDirectory.GetListOfKeys():
96 +            histoInfile.cd()
97 +            obj = key.ReadObj()
98 +            #print obj.GetName()
99 +            if obj.GetName() == 'tree':
100 +                continue
101 +            outfile.cd()
102 +            #print key.GetName()
103 +            obj.Write(key.GetName())
104 +
105 +        infile.cd()
106 +        tree = infile.Get('tree')
107 +        outfile.cd()
108 +        newtree = tree.CloneTree(0)
109 +        lheWeight = array('f',[0])
110 +        newtree.Branch('lheWeight',lheWeight,'lheWeight/F')
111 +
112 +        nEntries = tree.GetEntries()
113 +        theBinForms = {}
114 +        for bin in weight_map:
115 +            theBinForms[bin] = ROOT.TTreeFormula("Bin_formula_%s"%(bin),bin,tree)
116 +        for entry in range(0,nEntries):
117 +            tree.GetEntry(entry)
118 +            match_bin = None
119 +            for bin in weight_map:
120 +                if theBinForms[bin].EvalInstance() > 0.:
121 +                    match_bin = bin
122 +            if match_bin:
123 +                lheWeight[0] = weight_map[match_bin]
124 +            else:
125 +                lheWeight[0] = 1.
126 +            newtree.Fill()
127 +                  
128 +        newtree.AutoSave()
129 +        outfile.Write()
130 +        outfile.Close()
131 +        infile.Close()
132 +        del tree
133 +        del newtree
134 +
135 + def do_validation(fileList,inclusive,newprefix):
136 +    histo = ROOT.TH1F("lheV_pt","lheV_pt",300,0,300)
137 +    histo1 = ROOT.TH1F("lheV_ptInc","lheV_ptInc",300,0,300)
138 +
139 +    for file in fileList:
140 +        h_name=((newprefix+file[0]).split("."))[-2]
141 +        print h_name
142 +        tfile = ROOT.TFile.Open(newprefix+file[0],"read")
143 +        tree = tfile.Get("tree")
144 +        h_tmp = ROOT.TH1F(h_name,h_name,300,0,300)
145 +        ROOT.gDirectory.ls()
146 +        tree.Draw("lheV_pt>>"+str(h_name),"(lheWeight)*(lheV_pt < 300.)","goff")
147 +        if inclusive in file[0]:
148 +            h_tmp1 = ROOT.TH1F(h_name+'Inc',h_name+'Inc',300,0,300)
149 +            tree.Draw("lheV_pt>>"+str(h_name+'Inc'),"(lheV_pt < 300.)","goff")
150 +            histo1.Add(h_tmp1)
151 +
152 +        histo.Add(h_tmp)
153 +
154 +    canvas = ROOT.TCanvas("lheV_pt","lheV_pt",600,600)
155 +    ROOT.gPad.SetLogy()
156 +    histo.Draw()
157 +    histo1.Draw('SAME')
158 +    print histo.Integral(180,300)
159 +    print histo1.Integral(180,300)
160 +    histo1.SetLineColor(ROOT.kRed)
161 +    canvas.Print("validation_lheV_pt.pdf","pdf")
162 +
163 + weight_map = get_weights(fileList,lheBin)
164 + config.set('LHEWeights', 'weights_per_bin', '%s' %weight_map)
165 + f = open('lheWeights', 'w')
166 + config.write(f)
167 + f.close()
168 + apply_weights(fileList,weight_map,prefix+inclusive,newprefix)
169 + do_validation(fileList,inclusive,newprefix)

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines