ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitHzz4l/LeptonSelection/src/bdtiface.cc
Revision: 1.3
Committed: Tue Apr 24 16:54:04 2012 UTC (13 years ago) by dkralph
Content type: text/plain
Branch: MAIN
CVS Tags: synched
Changes since 1.2: +12 -8 lines
Log Message:
bug fix in minima/maxima treatment

File Contents

# User Rev Content
1 khahn 1.1 #include "bdtiface.h"
2    
3 dkralph 1.2 bdtiface::bdtiface(TString weightDir, TString cutfile, TString method, TString varfile):weightdir_(weightDir),varfile_(varfile)
4 khahn 1.1 {
5 dkralph 1.2 if(weightDir != "") { // leave dir string empty if you just want root to make bdtiface_cc.so
6 khahn 1.1
7 dkralph 1.2 // get names of all the xml files for this method in weight directory
8     FILE *fdls = popen("ls "+weightDir+"/*.weights.xml","r");
9     char filestring[999];
10     while(fscanf(fdls,"%s",filestring) == 1 ) {
11     TString xmlname(filestring);
12     TString binname = xmlname(TRegexp("s[0-9]_pt[0-9]"));
13     if(binname=="") binname = xmlname(TRegexp("Cat[0-9]"));
14 dkralph 1.3 if(binname=="") binname = xmlname(TRegexp("[CET][enr][nda][tcn][ras][api][^_][^_]*"));
15 dkralph 1.2 if(binname=="") binname = "~";
16 dkralph 1.3 // everybody's got different names for each bin...
17     if(binname=="Cat1" || binname=="CentralPt5To10") binname = "s0_pt0";
18     if(binname=="Cat2" || binname=="TransitionPt5To10") binname = "s1_pt0";
19     if(binname=="Cat3" || binname=="EndcapPt5To10") binname = "s2_pt0";
20     if(binname=="Cat4" || binname=="CentralPt10ToInf") binname = "s0_pt1";
21     if(binname=="Cat5" || binname=="TransitionPt10ToInf") binname = "s1_pt1";
22     if(binname=="Cat6" || binname=="EndcapPt10ToInf") binname = "s2_pt1";
23 dkralph 1.2 binnamev_.push_back(binname);
24     xml_names_[binname] = xmlname;
25     }
26     fclose(fdls);
27    
28     // read bin info from txt file
29     readbins(varfile);
30     // if only one weight file is found, read the variables from the txt file (since they'll be listed multiple times in the weight file...)
31 dkralph 1.3 cout << "bdtiface: loading... -------------------------------------------------------" << endl;
32 dkralph 1.2 if(binnamev_.size()==1) {cout << "\tbdtiface: reading from txt file..." << endl; readvars(varfile); }
33     // if several weight files found, read variables from weight files
34     else { cout << "\tbdtiface: reading from xml..." << endl; getxmlvars(); }
35     readlimits(varfile);
36    
37     // push back default bins if they weren't read from the txt file
38     if(ptbin_highs_.size()==0) { ptbin_highs_.push_back(20); ptbin_highs_.push_back(7000); }
39     if(etabin_highs_.size()==0) { etabin_highs_.push_back(0.8); etabin_highs_.push_back(1.485); etabin_highs_.push_back(2.5); }
40    
41     // create a reader for each bin
42     for(UInt_t ibin=0; ibin<binnamev_.size(); ibin++)
43     readers_[binnamev_[ibin]] = new TMVA::Reader( "!Color!Silent" );
44    
45     map<TString,TMVA::Reader*>::iterator it;
46     for(it=readers_.begin(); it!=readers_.end(); it++) {
47     TString bin((*it).first);
48    
49     // add vars to the tmva reader for this bin
50     for(unsigned ivar=0; ivar<floatvar_expressions_[bin].size(); ivar++) {
51     (*it).second->AddVariable(floatvar_expressions_[bin][ivar],&floatvals_[floatvar_names_[bin][ivar]]);
52     }
53     for(unsigned ispec=0; ispec<floatspec_names_[bin].size(); ispec++) {
54     TString varname(floatspec_names_[bin][ispec]);
55     (*it).second->AddSpectator(varname,&floatvals_[varname]);
56     }
57    
58     // read in weight file for this bin
59     TString weightfilename(xml_names_[(*it).first]);
60     ifstream weightfs(weightfilename);
61     if(weightfs.is_open()) (*it).second->BookMVA(method,weightfilename);
62     else { cerr << "bdtiface: Cannot find weight file: " << weightfilename << "!" << endl; assert(0); }
63     }
64     } else cerr << "bdtiface: Warning: not loading any weight files!" << endl;
65    
66     if(cutfile != "") readBDTCuts(cutfile); // same
67     else cerr << "bdtiface: Warning: not loading a cut file!" << endl;
68 dkralph 1.3 cout << "bdtiface: loaded... ----------------------------------------------------------------------------------------" << endl;
69 dkralph 1.2 }
70     //----------------------------------------------------------------------------------------
71     void bdtiface::getxmlvars()
72     {
73     map<TString,TString>::iterator it;
74     // loop over xml files
75     max_nfloats = 0;
76     for(it=xml_names_.begin(); it!=xml_names_.end(); it++) {
77     TString bin((*it).first);
78     ifstream ifs;
79     ifs.open((*it).second);
80     string line;
81     int ivar=0,ispec=0;
82     while(getline(ifs,line)) {
83     TString rline(line.c_str());
84     TString expression(rline(TRegexp("Expression=\"[^\"][^\"]*")));
85     expression = expression(12,expression.Length());
86     TString vname(rline(TRegexp("Label=\"[^\"][^\"]*")));
87     vname = vname(7,vname.Length());
88     TString type(rline(TRegexp("Type=\"[a-zA-Z]")));
89     type = type(6,type.Length());
90     if(rline.Contains(TRegexp("Variable VarIndex"))) {
91     if(type=="F" || type=="I") { // tmva::reader only allows floats...
92     floatvar_names_[bin].push_back(vname);
93     floatvar_expressions_[bin].push_back(expression);
94     if(floatvals_.find(vname) == floatvals_.end()) { floatvals_[vname] = Float_t(0); max_nfloats++;}
95     } else assert(0);
96     ivar++;
97     } else if(rline.Contains(TRegexp("Spectator SpecIndex"))) {
98     if(type=="F" || type=="I") {
99     floatspec_names_[bin].push_back(vname);
100     if(floatvals_.find(vname) == floatvals_.end()) { floatvals_[vname] = Float_t(0); max_nfloats++;}
101     } else assert(0);
102     ispec++;
103     }
104     }
105     }
106     }
107     //----------------------------------------------------------------------------------------
108     void bdtiface::readbins(TString varfile)
109     {
110     // read binning information from txt file
111     ifstream ifs(varfile);
112     assert(ifs.is_open());
113     string line;
114    
115     while(getline(ifs,line)) {
116     if(line[0]=='#') continue;
117     stringstream ss(line);
118    
119     if(line[0]=='@') {
120     TString pteta;
121     ss >> pteta;
122     float high;
123     while(!ss.eof()) {
124     ss >> high;
125     if(pteta=="@pt") ptbin_highs_.push_back(float(high));
126     else if(pteta=="@eta") etabin_highs_.push_back(float(high));
127     else break;
128     }
129     }
130 khahn 1.1 }
131 dkralph 1.2 ifs.close();
132     }
133     //----------------------------------------------------------------------------------------
134     void bdtiface::readvars(TString varfile)
135     {
136     max_nfloats = 0;
137     // read in list of mva variables
138     ifstream ifs(varfile);
139     assert(ifs.is_open());
140     string line;
141 khahn 1.1
142 dkralph 1.2 vector<TString> expressionv;
143     vector<char> typev;
144     TString bin("~");
145     while(getline(ifs,line)) {
146     if(line[0]=='#') continue;
147     stringstream ss(line);
148    
149     if(line[0]=='@') {
150     continue;
151     } else {
152     TString type;
153     TString vname,use,expression;
154     ss >> type >> vname >> use >> expression;
155     assert(type=='F'||type=='I');
156     assert(use=="var"||use=="spec");
157     if(use=="var") {
158     if(type=="F" || type=="I") {
159     floatvar_names_[bin].push_back(vname);
160     floatvar_expressions_[bin].push_back(expression);
161     if(floatvals_.find(vname) == floatvals_.end()) {
162     floatvals_[vname] = Float_t(0);
163     max_nfloats++;
164     }
165     } else assert(0);
166     } else if(use=="spec") {
167     if(type=="F" || type=="I") {
168     floatspec_names_[bin].push_back(vname);
169     if(floatvals_.find(vname) == floatvals_.end()) {
170     floatvals_[vname] = Float_t(0);
171     max_nfloats++;
172     }
173     } else assert(0);
174     }
175     }
176     }
177     }
178     //----------------------------------------------------------------------------------------
179     TString bdtiface::getBinName(double pt, double eta, TString era)
180     {
181     // classify by eta and pt bins
182    
183     TString subdet("");
184     for(unsigned ibin=0; ibin<etabin_highs_.size(); ibin++) {
185     if(fabs(eta) < etabin_highs_[ibin]) {
186     stringstream ss;
187     ss << ibin;
188     subdet = ss.str().c_str();
189     ss.str() == "";
190     break;
191     }
192     }
193     if(subdet=="") { cerr << "ERROR! eta not found: " << eta << endl; assert(0); }
194    
195     TString ptbin("");
196     for(unsigned ibin=0; ibin<ptbin_highs_.size(); ibin++) {
197     if(pt < ptbin_highs_[ibin]) {
198     stringstream ss;
199     ss << ibin;
200     ptbin = ss.str().c_str();
201     ss.str() == "";
202     break;
203     }
204     }
205     if(ptbin=="") { cerr << "ERROR! pt not found: " << pt << endl; assert(0); }
206    
207     TString binname;
208     if(era=="")
209     binname = "s" + subdet + "_pt" + ptbin;
210     else
211     binname = "s" + subdet + "_pt" + ptbin + "_" + era;
212    
213     bool found=false;
214     for(unsigned ibin=0; ibin<binnamev_.size(); ibin++) {
215     if(binnamev_[ibin] == binname) found = true;
216     }
217    
218     if(found) return binname;
219     else {
220     cerr << "bdtiface: could not find bin " << binname << " for pt: " << pt << "\teta: " << eta << " in " << weightdir_ << "!" << endl;
221     for(unsigned ibin=0; ibin<binnamev_.size(); ibin++) {
222     cout << binnamev_[ibin] << endl;
223     }
224     assert(0);
225     }
226     }
227 khahn 1.1 //----------------------------------------------------------------------------------------
228 dkralph 1.2 void bdtiface::readlimits(TString varfile)
229 khahn 1.1 {
230 dkralph 1.2 // get the limits for each variable
231     ifstream ifs(varfile);
232     assert(ifs.is_open());
233     string line;
234    
235     vector<TString> expressionv;
236     vector<char> typev;
237     TString bin("~");
238     while(getline(ifs,line)) {
239     if(line[0]=='#') continue;
240     stringstream ss(line);
241 khahn 1.1
242 dkralph 1.2 if(line[0]=='@') {
243     continue;
244     } else {
245     TString type,vname,use,expression,etabin;
246     bool abs;
247     double min,max;
248     ss >> type >> vname >> use >> expression >> etabin >> abs >> min >> max;
249     minima_[vname] = (Float_t)min;
250     maxima_[vname] = (Float_t)max;
251     }
252     }
253 khahn 1.1 }
254     //----------------------------------------------------------------------------------------
255     void bdtiface::readBDTCuts(TString file)
256     {
257     // get bdt cut values from file
258     ifstream ifs;
259     ifs.open(file.Data());
260 dkralph 1.2 if(!ifs.is_open()) {
261     cerr << "bdtiface: Warning: bdtcut file does not exist!" << endl;
262     return;
263     }
264 khahn 1.1 string line;
265     while(getline(ifs,line)) {
266     stringstream ss(line);
267     string binname;
268     Double_t loosecut,mediumcut,tightcut;
269     ss >> binname >> loosecut >> mediumcut >> tightcut;
270 dkralph 1.2 loosecuts_[binname] = loosecut;
271     mediumcuts_[binname] = mediumcut;
272     tightcuts_[binname] = tightcut;
273 khahn 1.1 }
274     }
275     //----------------------------------------------------------------------------------------
276 dkralph 1.2 void bdtiface::setVals(map<TString,Float_t> floatvals)
277 khahn 1.1 {
278 dkralph 1.2 // pass in a map of the variable values for each event
279     if(floatvals.size()!=max_nfloats) {
280     cerr << "ERROR, passed vars: " << floatvals.size() << "\t" << "number required: " << max_nfloats << endl;
281     map<TString,Float_t>::iterator it;
282     if(floatvals.size() > floatvals_.size()) {
283     for(it=floatvals.begin(); it!=floatvals.end(); it++) {
284     if(floatvals_.find((*it).first) == floatvals_.end()) cerr << "\t" << (*it).first << " not read from xml/txt in " << weightdir_(0,weightdir_.Last('/')) << endl;
285     }
286     } else {
287     for(it=floatvals_.begin(); it!=floatvals_.end(); it++) {
288     if(floatvals.find((*it).first) == floatvals.end()) cerr << "\t" << (*it).first << " not found in map of passed floatvals" << endl;
289     }
290     }
291     assert(0);
292     }
293    
294     map<TString,Float_t>::iterator it_fl;
295     for(it_fl=floatvals.begin(); it_fl!=floatvals.end(); it_fl++) {
296     TString name((*it_fl).first);
297     if(floatvals_.find(name) == floatvals_.end()) { cerr << "bdtiface: " << name << " not found in float vals read from xml/txt" << endl; assert(0); }
298     Float_t value = floatvals[name];
299     assert(minima_.find(name) != minima_.end()); assert(maxima_.find(name) != maxima_.end());
300 dkralph 1.3 if(minima_[name] != -9999999 && value < minima_[name]) value = minima_[name];
301     if(maxima_[name] != 9999999 && value > maxima_[name]) value = maxima_[name];
302 dkralph 1.2 floatvals_[name] = value;
303     }
304     }
305     //----------------------------------------------------------------------------------------
306     double bdtiface::getBDTCut(double pt, double eta, TString tightness, TString era)
307     {
308     TString binname = getBinName(pt,eta,era);
309     if(tightness=="loose") return loosecuts_[binname];
310     else if(tightness=="medium") return mediumcuts_[binname];
311     else if(tightness=="tight") return tightcuts_[binname];
312     else {cout << "error! bad tightness" << endl; assert(0); return -1;}
313     }
314     //----------------------------------------------------------------------------------------
315     double bdtiface::getBDTCut(TString binname, TString tightness)
316     {
317     if(tightness=="loose") return loosecuts_[binname];
318     else if(tightness=="medium") return mediumcuts_[binname];
319     else if(tightness=="tight") return tightcuts_[binname];
320 khahn 1.1 else {cout << "error! bad tightness" << endl; assert(0); return -1;}
321     }
322     //----------------------------------------------------------------------------------------
323 dkralph 1.2 double bdtiface::getBDTval(Double_t pt, Double_t eta, TString era, TString method)
324     {
325     TString binname((readers_.size()>1) ? getBinName(pt,eta,era) : "~");
326     assert(readers_.find(binname) != readers_.end());
327     TMVA::Reader *reader = readers_[binname];
328     return reader->EvaluateMVA(method);
329     }
330     //----------------------------------------------------------------------------------------
331     Bool_t bdtiface::passBDT(Double_t pt, Double_t eta, TString tightness, TString era, TString method)
332 khahn 1.1 {
333 dkralph 1.2 double bdtcut = getBDTCut(pt,eta,tightness,era);
334     TString binname((readers_.size()>1) ? getBinName(pt,eta,era) : "~");
335     assert(readers_.find(binname) != readers_.end());
336     TMVA::Reader *reader = readers_[binname];
337     double bdtval = reader->EvaluateMVA(method);
338    
339     //cerr << "bdtcut: " << bdtcut << "\tbdtval: " << bdtval << endl;
340 khahn 1.1
341     return bdtval > bdtcut;
342     }
343 dkralph 1.2 //----------------------------------------------------------------------------------------
344     void bdtiface::print()
345     {
346     cout << "bdtiface -- weightdir: " << weightdir_ << endl;
347     }