ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitHzz4l/LeptonSelection/src/bdtiface.cc
Revision: 1.5
Committed: Sun May 6 14:56:25 2012 UTC (13 years ago) by dkralph
Content type: text/plain
Branch: MAIN
CVS Tags: compiled, synced_FSR_2, synced_FSR, synched2, HEAD
Changes since 1.4: +53 -26 lines
Error occurred while calculating annotation data.
Log Message:
*** empty log message ***

File Contents

# Content
1 #include "bdtiface.h"
2
3 bdtiface::bdtiface(TString weightDir, TString cutfile, TString method, TString varfile):weightdir_(weightDir),varfile_(varfile)
4 {
5 if(weightDir != "") { // leave dir string empty if you just want root to make bdtiface_cc.so
6
7 // get names of all the xml files for this method in weight directory
8 FILE *fdls = popen("ls "+weightDir+"/*.weights.xml","r");
9 char filestring[999];
10 while(fscanf(fdls,"%s",filestring) == 1 ) {
11 TString xmlname(filestring);
12 TString binname = xmlname(TRegexp("g[0-9]_t[0-9]_s[0-9][0-9]*_pt[0-9][0-9]*"));
13 if(binname=="") binname = xmlname(TRegexp("s[0-9]_pt[0-9]"));
14 if(binname=="") binname = xmlname(TRegexp("Cat[0-9]"));
15 if(binname=="") binname = xmlname(TRegexp("[CETB][enra][ndar][tcnr][rase][apil][^_][^_]*"));
16 if(binname=="") binname = "~";
17 // everybody's got different names for each bin...
18 // check it here: http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi/UserCode/EGamma/EGammaAnalysisTools/src/EGammaMvaEleEstimator.cc?revision=1.2&view=markup
19 if(binname=="Cat1") binname = "s0_pt0";
20 if(binname=="Cat2") binname = "s1_pt0";
21 if(binname=="Cat3") binname = "s2_pt0";
22 if(binname=="Cat4") binname = "s0_pt1";
23 if(binname=="Cat5") binname = "s1_pt1";
24 if(binname=="Cat6") binname = "s2_pt1";
25 binnamev_.push_back(binname);
26 xml_names_[binname] = xmlname;
27 }
28 fclose(fdls);
29
30 // read bin info from txt file
31 readbins(varfile);
32 // if only one weight file is found, read the variables from the txt file (since they'll be listed multiple times in the weight file...)
33 cout << "bdtiface: loading... -------------------------------------------------------" << endl;
34 if(binnamev_.size()==1) {cout << "\tbdtiface: reading from txt file..." << endl; readvars(varfile); }
35 // if several weight files found, read variables from weight files
36 else { cout << "\tbdtiface: reading from xml..." << endl; getxmlvars(); }
37 readlimits(varfile);
38
39 // push back default bins if they weren't read from the txt file
40 // if(ptbin_highs_.size()==0) { ptbin_highs_.push_back(20); ptbin_highs_.push_back(7000); }
41 // if(etabin_highs_.size()==0) { etabin_highs_.push_back(0.8); etabin_highs_.push_back(1.485); etabin_highs_.push_back(2.5); }
42
43 // create a reader for each bin
44 for(UInt_t ibin=0; ibin<binnamev_.size(); ibin++)
45 readers_[binnamev_[ibin]] = new TMVA::Reader( "!Color!Silent" );
46
47 map<TString,TMVA::Reader*>::iterator it;
48 for(it=readers_.begin(); it!=readers_.end(); it++) {
49 TString bin((*it).first);
50
51 // add vars to the tmva reader for this bin
52 for(unsigned ivar=0; ivar<floatvar_expressions_[bin].size(); ivar++) {
53 (*it).second->AddVariable(floatvar_expressions_[bin][ivar],&floatvals_[floatvar_names_[bin][ivar]]);
54 }
55 for(unsigned ispec=0; ispec<floatspec_names_[bin].size(); ispec++) {
56 TString varname(floatspec_names_[bin][ispec]);
57 (*it).second->AddSpectator(varname,&floatvals_[varname]);
58 }
59
60 // read in weight file for this bin
61 TString weightfilename(xml_names_[(*it).first]);
62 ifstream weightfs(weightfilename);
63 if(weightfs.is_open()) (*it).second->BookMVA(method,weightfilename);
64 else { cerr << "bdtiface: Cannot find weight file: " << weightfilename << "!" << endl; assert(0); }
65 }
66 } else cerr << "bdtiface: Warning: not loading any weight files!" << endl;
67
68 if(cutfile == "") cerr << "bdtiface: Warning: not loading a cut file!" << endl;
69 else readBDTCuts(cutfile); // same
70 cout << "bdtiface: loaded... ----------------------------------------------------------------------------------------" << endl;
71 }
72 //----------------------------------------------------------------------------------------
73 void bdtiface::getxmlvars()
74 {
75 map<TString,TString>::iterator it;
76 // loop over xml files
77 max_nfloats = 0;
78 for(it=xml_names_.begin(); it!=xml_names_.end(); it++) {
79 TString bin((*it).first);
80 ifstream ifs;
81 ifs.open((*it).second);
82 string line;
83 int ivar=0,ispec=0;
84 while(getline(ifs,line)) {
85 TString rline(line.c_str());
86 TString expression(rline(TRegexp("Expression=\"[^\"][^\"]*")));
87 expression = expression(12,expression.Length());
88 TString vname(rline(TRegexp("Label=\"[^\"][^\"]*")));
89 vname = vname(7,vname.Length());
90 TString type(rline(TRegexp("Type=\"[a-zA-Z]")));
91 type = type(6,type.Length());
92 if(rline.Contains(TRegexp("Variable VarIndex"))) {
93 if(type=="F" || type=="I") { // tmva::reader only allows floats...
94 floatvar_names_[bin].push_back(vname);
95 floatvar_expressions_[bin].push_back(expression);
96 if(floatvals_.find(vname) == floatvals_.end()) { floatvals_[vname] = Float_t(0); max_nfloats++;}
97 } else assert(0);
98 ivar++;
99 } else if(rline.Contains(TRegexp("Spectator SpecIndex"))) {
100 if(type=="F" || type=="I") {
101 floatspec_names_[bin].push_back(vname);
102 if(floatvals_.find(vname) == floatvals_.end()) { floatvals_[vname] = Float_t(0); max_nfloats++;}
103 } else assert(0);
104 ispec++;
105 }
106 }
107 }
108 }
109 //----------------------------------------------------------------------------------------
110 void bdtiface::readbins(TString varfile)
111 {
112 // read binning information from txt file
113 ifstream ifs(varfile);
114 assert(ifs.is_open());
115 string line;
116
117 bool list=false; // just list all bins in the txt file
118 vector<float> tmp_pt_highs,tmp_eta_highs;
119 while(getline(ifs,line)) {
120 if(line[0]=='#') continue;
121 if(line[0]!='@') continue;
122 stringstream ss(line);
123 if(line=="@LIST") { list=true; continue; }
124
125 TString first_str;
126 ss >> first_str;
127 while(!ss.eof()) {
128 if(list) {
129 TString variable;
130 Float_t low,high;
131 ss >> variable >> low >> high;
132
133 TString binname(first_str(1,first_str.Length()));
134 // cout << binname << "\t" << variable << "\t" << low << "\t" << high << endl;
135 if(variable=="glb") {
136 glbbin_lows_[binname] = *(new Float_t(low));
137 glbbin_highs_[binname] = *(new Float_t(high));
138 } else if(variable=="trk") {
139 trkbin_lows_[binname] = *(new Float_t(low));
140 trkbin_highs_[binname] = *(new Float_t(high));
141 } else if(variable=="pt") {
142 ptbin_lows_[binname] = *(new Float_t(low));
143 ptbin_highs_[binname] = *(new Float_t(high));
144 } else if(variable=="eta") {
145 etabin_lows_[binname] = *(new Float_t(low));
146 etabin_highs_[binname] = *(new Float_t(high));
147 } else { cerr << "bad variable: " << variable << endl; assert(0); }
148
149 } else {
150 float high;
151 ss >> high;
152
153 if(first_str=="@pt") tmp_pt_highs.push_back(float(high));
154 else if(first_str=="@eta") tmp_eta_highs.push_back(float(high));
155 else break;
156 }
157 }
158 }
159
160 if(!list) {
161 for(unsigned ipt=0; ipt<tmp_pt_highs.size(); ipt++) {
162 for(unsigned ieta=0; ieta<tmp_eta_highs.size(); ieta++) {
163
164 stringstream ss_binname;
165 ss_binname << "s" << ieta << "_pt" << ipt;
166 TString binname(ss_binname.str().c_str());
167
168 ptbin_lows_[binname] = *(new Float_t((ipt==0) ? 0 : tmp_pt_highs[ipt-1]));
169 ptbin_highs_[binname] = *(new Float_t(tmp_pt_highs[ipt]));
170 etabin_lows_[binname] = *(new Float_t((ieta==0) ? 0 : tmp_eta_highs[ieta-1]));
171 etabin_highs_[binname] = *(new Float_t(tmp_eta_highs[ieta]));
172 }
173 }
174 }
175
176 ifs.close();
177 }
178 //----------------------------------------------------------------------------------------
179 void bdtiface::readvars(TString varfile)
180 {
181 max_nfloats = 0;
182 // read in list of mva variables
183 ifstream ifs(varfile);
184 assert(ifs.is_open());
185 string line;
186
187 vector<TString> expressionv;
188 vector<char> typev;
189 TString bin("~");
190 while(getline(ifs,line)) {
191 if(line[0]=='#') continue;
192 stringstream ss(line);
193
194 if(line[0]=='@') {
195 continue;
196 } else {
197 TString type;
198 TString vname,use,expression;
199 ss >> type >> vname >> use >> expression;
200 assert(type=='F'||type=='I');
201 assert(use=="var"||use=="spec");
202 if(use=="var") {
203 if(type=="F" || type=="I") {
204 floatvar_names_[bin].push_back(vname);
205 floatvar_expressions_[bin].push_back(expression);
206 if(floatvals_.find(vname) == floatvals_.end()) {
207 floatvals_[vname] = Float_t(0);
208 max_nfloats++;
209 }
210 } else assert(0);
211 } else if(use=="spec") {
212 if(type=="F" || type=="I") {
213 floatspec_names_[bin].push_back(vname);
214 if(floatvals_.find(vname) == floatvals_.end()) {
215 floatvals_[vname] = Float_t(0);
216 max_nfloats++;
217 }
218 } else assert(0);
219 }
220 }
221 }
222 }
223 //----------------------------------------------------------------------------------------
224 TString bdtiface::getBinName(double pt, double eta, int isglobal, int istracker)
225 {
226 // classify by eta and pt bins
227 for(unsigned ibin=0; ibin<binnamev_.size(); ibin++) {
228 TString binname(binnamev_[ibin]);
229 // cout << binname << "\t"
230 // << ptbin_lows_[binname] << "\t"
231 // << ptbin_highs_[binname] << "\t"
232 // << etabin_lows_[binname] << "\t"
233 // << etabin_highs_[binname] << "\t"
234 // << endl;
235 bool thisptbin=false,thisetabin=false,thisglobalbin=false,thistrackerbin=false;
236 if( pt >= ptbin_lows_[binname] && pt < ptbin_highs_[binname]) thisptbin = true;
237 if(fabs(eta) >= etabin_lows_[binname] && fabs(eta) < etabin_highs_[binname]) thisetabin = true;
238 if(isglobal != -1)
239 if(isglobal == glbbin_lows_[binname] && isglobal == glbbin_highs_[binname]) thisglobalbin = true;
240 if(istracker != -1)
241 if(istracker == trkbin_lows_[binname] && istracker == trkbin_highs_[binname]) thistrackerbin = true;
242
243 if(thisptbin && thisetabin && (isglobal==-1 || thisglobalbin) && (istracker==-1 || thistrackerbin)) return binname;
244 }
245
246 // assert(0);
247 cerr << "BIN NOT FOUND: " << pt << "\t" << eta << endl;
248 for(unsigned ibin=0; ibin<binnamev_.size(); ibin++) {
249 TString binname(binnamev_[ibin]);
250 cout << binnamev_[ibin] << "\t" << ptbin_lows_[binname] << "," << ptbin_highs_[binname] << "\t" << etabin_lows_[binname] << "," << etabin_highs_[binname] << endl;
251 }
252
253 assert(0);
254 }
255 //----------------------------------------------------------------------------------------
256 void bdtiface::readlimits(TString varfile)
257 {
258 // get the limits for each variable
259 ifstream ifs(varfile);
260 assert(ifs.is_open());
261 string line;
262
263 vector<TString> expressionv;
264 vector<char> typev;
265 TString bin("~");
266 while(getline(ifs,line)) {
267 if(line[0]=='#') continue;
268 stringstream ss(line);
269
270 if(line[0]=='@') {
271 continue;
272 } else {
273 TString type,vname,use,expression,etabin;
274 bool abs;
275 double min,max;
276 ss >> type >> vname >> use >> expression >> etabin >> abs >> min >> max;
277 minima_[vname] = (Float_t)min;
278 maxima_[vname] = (Float_t)max;
279 }
280 }
281 }
282 //----------------------------------------------------------------------------------------
283 void bdtiface::readBDTCuts(TString file)
284 {
285
286 // get bdt cut values from file
287 ifstream ifs;
288 ifs.open(file.Data());
289 if(!ifs.is_open()) {
290 cerr << "bdtiface: Warning: bdtcut file does not exist!" << endl;
291 // for(unsigned ibin=0; ibin<binnamev_.size(); ibin++) {
292 // TString binname(binnamev_[ibin]);
293 // loosecuts_[binname] = 999;
294 // mediumcuts_[binname] = 999;
295 // tightcuts_[binname] = 999;
296 // }
297 return;
298 }
299 string line;
300 vector<TString> wpnames;
301 while(getline(ifs,line)) {
302 stringstream ss(line);
303 // push back the working point names
304 if(line[0]=='@') {
305 TString atsign;
306 ss >> atsign;
307 while(!ss.eof()) {
308 TString wpname;
309 ss >> wpname;
310 wpname.ReplaceAll("@","");
311 wpnames.push_back(wpname);
312 }
313 continue;
314 }
315
316 if(wpnames.size()==0) { wpnames.push_back("loose"); wpnames.push_back("medium"); wpnames.push_back("tight"); }
317
318 string binname;
319 ss >> binname;
320 for(unsigned iwp=0; iwp<wpnames.size(); iwp++) {
321 double cutval;
322 ss >> cutval;
323 cuts_[binname+"_"+wpnames[iwp]] = *(new double(cutval));
324 // cout << binname << "\t" << wpnames[iwp] << cutval << endl;
325 }
326 // // Double_t loosecut,mediumcut,tightcut;
327 // // ss >> binname >> loosecut >> mediumcut >> tightcut;
328 // loosecuts_[binname] = cuts_[binname+"_loose"];
329 // mediumcuts_[binname] = cuts_[binname+"_medium"];
330 // tightcuts_[binname] = cuts_[binname+"_tight"];
331 }
332 }
333 //----------------------------------------------------------------------------------------
334 void bdtiface::setVals(map<TString,Float_t> floatvals)
335 {
336 // pass in a map of the variable values for each event
337 if(floatvals.size()!=max_nfloats) {
338 cerr << "ERROR, passed vars: " << floatvals.size() << "\t" << "number required: " << max_nfloats << endl;
339 map<TString,Float_t>::iterator it;
340 if(floatvals.size() > floatvals_.size()) {
341 for(it=floatvals.begin(); it!=floatvals.end(); it++) {
342 if(floatvals_.find((*it).first) == floatvals_.end()) cerr << "\t" << (*it).first << " not read from xml/txt in " << weightdir_(0,weightdir_.Last('/')) << endl;
343 }
344 } else {
345 for(it=floatvals_.begin(); it!=floatvals_.end(); it++) {
346 if(floatvals.find((*it).first) == floatvals.end()) cerr << "\t" << (*it).first << " not found in map of passed floatvals" << endl;
347 }
348 }
349 assert(0);
350 }
351
352 map<TString,Float_t>::iterator it_fl;
353 for(it_fl=floatvals.begin(); it_fl!=floatvals.end(); it_fl++) {
354 TString name((*it_fl).first);
355 if(floatvals_.find(name) == floatvals_.end()) { cerr << "bdtiface: " << name << " not found in float vals read from xml/txt" << endl; assert(0); }
356 Float_t value = floatvals[name];
357 assert(minima_.find(name) != minima_.end()); assert(maxima_.find(name) != maxima_.end());
358 if(minima_[name] != -9999999 && value < minima_[name]) value = minima_[name];
359 if(maxima_[name] != 9999999 && value > maxima_[name]) value = maxima_[name];
360 floatvals_[name] = value;
361 }
362 }
363 //----------------------------------------------------------------------------------------
364 double bdtiface::getBDTCut(double pt, double eta, TString tightness)
365 {
366 TString binname = getBinName(pt,eta);
367 return getBDTCut(binname,tightness);
368 }
369 //----------------------------------------------------------------------------------------
370 double bdtiface::getBDTCut(TString binname, TString tightness)
371 {
372 TString cutlabel(binname+"_"+tightness);
373 if(cuts_.find(binname+"_"+tightness) != cuts_.end()) return cuts_[binname+"_"+tightness];
374 cout << cutlabel << " not found!" << endl;
375 map<TString,double>::iterator it;
376 for(it=cuts_.begin(); it!=cuts_.end(); it++) {
377 cout << "\t" << (*it).first << "\t" << (*it).second << endl;
378 }
379 assert(0);
380 // if(tightness=="loose") return loosecuts_[binname];
381 // else if(tightness=="medium") return mediumcuts_[binname];
382 // else if(tightness=="tight") return tightcuts_[binname];
383 // else {cout << "error! bad tightness" << endl; assert(0); return -1;}
384 }
385 //----------------------------------------------------------------------------------------
386 double bdtiface::getBDTval(Double_t pt, Double_t eta, TString method)
387 {
388 TString binname((readers_.size()>1) ? getBinName(pt,eta) : "~");
389 assert(readers_.find(binname) != readers_.end());
390 TMVA::Reader *reader = readers_[binname];
391 return reader->EvaluateMVA(method);
392 }
393 //----------------------------------------------------------------------------------------
394 Bool_t bdtiface::passBDT(Double_t pt, Double_t eta, TString tightness, TString method)
395 {
396 double bdtcut = getBDTCut(pt,eta,tightness);
397 TString binname((readers_.size()>1) ? getBinName(pt,eta) : "~");
398 assert(readers_.find(binname) != readers_.end());
399 TMVA::Reader *reader = readers_[binname];
400 double bdtval = reader->EvaluateMVA(method);
401
402 //cerr << "bdtcut: " << bdtcut << "\tbdtval: " << bdtval << endl;
403
404 return bdtval > bdtcut;
405 }
406 //----------------------------------------------------------------------------------------
407 void bdtiface::print()
408 {
409 cout << "bdtiface -- weightdir: " << weightdir_ << endl;
410 }
411