1 |
#include "bdtiface.h"
|
2 |
|
3 |
bdtiface::bdtiface(TString weightDir, TString cutfile, TString method, TString varfile):weightdir_(weightDir),varfile_(varfile)
|
4 |
{
|
5 |
if(weightDir != "") { // leave dir string empty if you just want root to make bdtiface_cc.so
|
6 |
|
7 |
// get names of all the xml files for this method in weight directory
|
8 |
FILE *fdls = popen("ls "+weightDir+"/*.weights.xml","r");
|
9 |
char filestring[999];
|
10 |
while(fscanf(fdls,"%s",filestring) == 1 ) {
|
11 |
TString xmlname(filestring);
|
12 |
TString binname = xmlname(TRegexp("g[0-9]_t[0-9]_s[0-9][0-9]*_pt[0-9][0-9]*"));
|
13 |
if(binname=="") binname = xmlname(TRegexp("s[0-9]_pt[0-9]"));
|
14 |
if(binname=="") binname = xmlname(TRegexp("Cat[0-9]"));
|
15 |
if(binname=="") binname = xmlname(TRegexp("[CETB][enra][ndar][tcnr][rase][apil][^_][^_]*"));
|
16 |
if(binname=="") binname = "~";
|
17 |
// everybody's got different names for each bin...
|
18 |
// check it here: http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi/UserCode/EGamma/EGammaAnalysisTools/src/EGammaMvaEleEstimator.cc?revision=1.2&view=markup
|
19 |
if(binname=="Cat1") binname = "s0_pt0";
|
20 |
if(binname=="Cat2") binname = "s1_pt0";
|
21 |
if(binname=="Cat3") binname = "s2_pt0";
|
22 |
if(binname=="Cat4") binname = "s0_pt1";
|
23 |
if(binname=="Cat5") binname = "s1_pt1";
|
24 |
if(binname=="Cat6") binname = "s2_pt1";
|
25 |
binnamev_.push_back(binname);
|
26 |
xml_names_[binname] = xmlname;
|
27 |
}
|
28 |
fclose(fdls);
|
29 |
|
30 |
// read bin info from txt file
|
31 |
readbins(varfile);
|
32 |
// if only one weight file is found, read the variables from the txt file (since they'll be listed multiple times in the weight file...)
|
33 |
cout << "bdtiface: loading... -------------------------------------------------------" << endl;
|
34 |
if(binnamev_.size()==1) {cout << "\tbdtiface: reading from txt file..." << endl; readvars(varfile); }
|
35 |
// if several weight files found, read variables from weight files
|
36 |
else { cout << "\tbdtiface: reading from xml..." << endl; getxmlvars(); }
|
37 |
readlimits(varfile);
|
38 |
|
39 |
// push back default bins if they weren't read from the txt file
|
40 |
// if(ptbin_highs_.size()==0) { ptbin_highs_.push_back(20); ptbin_highs_.push_back(7000); }
|
41 |
// if(etabin_highs_.size()==0) { etabin_highs_.push_back(0.8); etabin_highs_.push_back(1.485); etabin_highs_.push_back(2.5); }
|
42 |
|
43 |
// create a reader for each bin
|
44 |
for(UInt_t ibin=0; ibin<binnamev_.size(); ibin++)
|
45 |
readers_[binnamev_[ibin]] = new TMVA::Reader( "!Color!Silent" );
|
46 |
|
47 |
map<TString,TMVA::Reader*>::iterator it;
|
48 |
for(it=readers_.begin(); it!=readers_.end(); it++) {
|
49 |
TString bin((*it).first);
|
50 |
|
51 |
// add vars to the tmva reader for this bin
|
52 |
for(unsigned ivar=0; ivar<floatvar_expressions_[bin].size(); ivar++) {
|
53 |
(*it).second->AddVariable(floatvar_expressions_[bin][ivar],&floatvals_[floatvar_names_[bin][ivar]]);
|
54 |
}
|
55 |
for(unsigned ispec=0; ispec<floatspec_names_[bin].size(); ispec++) {
|
56 |
TString varname(floatspec_names_[bin][ispec]);
|
57 |
(*it).second->AddSpectator(varname,&floatvals_[varname]);
|
58 |
}
|
59 |
|
60 |
// read in weight file for this bin
|
61 |
TString weightfilename(xml_names_[(*it).first]);
|
62 |
ifstream weightfs(weightfilename);
|
63 |
if(weightfs.is_open()) (*it).second->BookMVA(method,weightfilename);
|
64 |
else { cerr << "bdtiface: Cannot find weight file: " << weightfilename << "!" << endl; assert(0); }
|
65 |
}
|
66 |
} else cerr << "bdtiface: Warning: not loading any weight files!" << endl;
|
67 |
|
68 |
if(cutfile == "") cerr << "bdtiface: Warning: not loading a cut file!" << endl;
|
69 |
else readBDTCuts(cutfile); // same
|
70 |
cout << "bdtiface: loaded... ----------------------------------------------------------------------------------------" << endl;
|
71 |
}
|
72 |
//----------------------------------------------------------------------------------------
|
73 |
void bdtiface::getxmlvars()
|
74 |
{
|
75 |
map<TString,TString>::iterator it;
|
76 |
// loop over xml files
|
77 |
max_nfloats = 0;
|
78 |
for(it=xml_names_.begin(); it!=xml_names_.end(); it++) {
|
79 |
TString bin((*it).first);
|
80 |
ifstream ifs;
|
81 |
ifs.open((*it).second);
|
82 |
string line;
|
83 |
int ivar=0,ispec=0;
|
84 |
while(getline(ifs,line)) {
|
85 |
TString rline(line.c_str());
|
86 |
TString expression(rline(TRegexp("Expression=\"[^\"][^\"]*")));
|
87 |
expression = expression(12,expression.Length());
|
88 |
TString vname(rline(TRegexp("Label=\"[^\"][^\"]*")));
|
89 |
vname = vname(7,vname.Length());
|
90 |
TString type(rline(TRegexp("Type=\"[a-zA-Z]")));
|
91 |
type = type(6,type.Length());
|
92 |
if(rline.Contains(TRegexp("Variable VarIndex"))) {
|
93 |
if(type=="F" || type=="I") { // tmva::reader only allows floats...
|
94 |
floatvar_names_[bin].push_back(vname);
|
95 |
floatvar_expressions_[bin].push_back(expression);
|
96 |
if(floatvals_.find(vname) == floatvals_.end()) { floatvals_[vname] = Float_t(0); max_nfloats++;}
|
97 |
} else assert(0);
|
98 |
ivar++;
|
99 |
} else if(rline.Contains(TRegexp("Spectator SpecIndex"))) {
|
100 |
if(type=="F" || type=="I") {
|
101 |
floatspec_names_[bin].push_back(vname);
|
102 |
if(floatvals_.find(vname) == floatvals_.end()) { floatvals_[vname] = Float_t(0); max_nfloats++;}
|
103 |
} else assert(0);
|
104 |
ispec++;
|
105 |
}
|
106 |
}
|
107 |
}
|
108 |
}
|
109 |
//----------------------------------------------------------------------------------------
|
110 |
void bdtiface::readbins(TString varfile)
|
111 |
{
|
112 |
// read binning information from txt file
|
113 |
ifstream ifs(varfile);
|
114 |
assert(ifs.is_open());
|
115 |
string line;
|
116 |
|
117 |
bool list=false; // just list all bins in the txt file
|
118 |
vector<float> tmp_pt_highs,tmp_eta_highs;
|
119 |
while(getline(ifs,line)) {
|
120 |
if(line[0]=='#') continue;
|
121 |
if(line[0]!='@') continue;
|
122 |
stringstream ss(line);
|
123 |
if(line=="@LIST") { list=true; continue; }
|
124 |
|
125 |
TString first_str;
|
126 |
ss >> first_str;
|
127 |
while(!ss.eof()) {
|
128 |
if(list) {
|
129 |
TString variable;
|
130 |
Float_t low,high;
|
131 |
ss >> variable >> low >> high;
|
132 |
|
133 |
TString binname(first_str(1,first_str.Length()));
|
134 |
// cout << binname << "\t" << variable << "\t" << low << "\t" << high << endl;
|
135 |
if(variable=="glb") {
|
136 |
glbbin_lows_[binname] = *(new Float_t(low));
|
137 |
glbbin_highs_[binname] = *(new Float_t(high));
|
138 |
} else if(variable=="trk") {
|
139 |
trkbin_lows_[binname] = *(new Float_t(low));
|
140 |
trkbin_highs_[binname] = *(new Float_t(high));
|
141 |
} else if(variable=="pt") {
|
142 |
ptbin_lows_[binname] = *(new Float_t(low));
|
143 |
ptbin_highs_[binname] = *(new Float_t(high));
|
144 |
} else if(variable=="eta") {
|
145 |
etabin_lows_[binname] = *(new Float_t(low));
|
146 |
etabin_highs_[binname] = *(new Float_t(high));
|
147 |
} else { cerr << "bad variable: " << variable << endl; assert(0); }
|
148 |
|
149 |
} else {
|
150 |
float high;
|
151 |
ss >> high;
|
152 |
|
153 |
if(first_str=="@pt") tmp_pt_highs.push_back(float(high));
|
154 |
else if(first_str=="@eta") tmp_eta_highs.push_back(float(high));
|
155 |
else break;
|
156 |
}
|
157 |
}
|
158 |
}
|
159 |
|
160 |
if(!list) {
|
161 |
for(unsigned ipt=0; ipt<tmp_pt_highs.size(); ipt++) {
|
162 |
for(unsigned ieta=0; ieta<tmp_eta_highs.size(); ieta++) {
|
163 |
|
164 |
stringstream ss_binname;
|
165 |
ss_binname << "s" << ieta << "_pt" << ipt;
|
166 |
TString binname(ss_binname.str().c_str());
|
167 |
|
168 |
ptbin_lows_[binname] = *(new Float_t((ipt==0) ? 0 : tmp_pt_highs[ipt-1]));
|
169 |
ptbin_highs_[binname] = *(new Float_t(tmp_pt_highs[ipt]));
|
170 |
etabin_lows_[binname] = *(new Float_t((ieta==0) ? 0 : tmp_eta_highs[ieta-1]));
|
171 |
etabin_highs_[binname] = *(new Float_t(tmp_eta_highs[ieta]));
|
172 |
}
|
173 |
}
|
174 |
}
|
175 |
|
176 |
ifs.close();
|
177 |
}
|
178 |
//----------------------------------------------------------------------------------------
|
179 |
void bdtiface::readvars(TString varfile)
|
180 |
{
|
181 |
max_nfloats = 0;
|
182 |
// read in list of mva variables
|
183 |
ifstream ifs(varfile);
|
184 |
assert(ifs.is_open());
|
185 |
string line;
|
186 |
|
187 |
vector<TString> expressionv;
|
188 |
vector<char> typev;
|
189 |
TString bin("~");
|
190 |
while(getline(ifs,line)) {
|
191 |
if(line[0]=='#') continue;
|
192 |
stringstream ss(line);
|
193 |
|
194 |
if(line[0]=='@') {
|
195 |
continue;
|
196 |
} else {
|
197 |
TString type;
|
198 |
TString vname,use,expression;
|
199 |
ss >> type >> vname >> use >> expression;
|
200 |
assert(type=='F'||type=='I');
|
201 |
assert(use=="var"||use=="spec");
|
202 |
if(use=="var") {
|
203 |
if(type=="F" || type=="I") {
|
204 |
floatvar_names_[bin].push_back(vname);
|
205 |
floatvar_expressions_[bin].push_back(expression);
|
206 |
if(floatvals_.find(vname) == floatvals_.end()) {
|
207 |
floatvals_[vname] = Float_t(0);
|
208 |
max_nfloats++;
|
209 |
}
|
210 |
} else assert(0);
|
211 |
} else if(use=="spec") {
|
212 |
if(type=="F" || type=="I") {
|
213 |
floatspec_names_[bin].push_back(vname);
|
214 |
if(floatvals_.find(vname) == floatvals_.end()) {
|
215 |
floatvals_[vname] = Float_t(0);
|
216 |
max_nfloats++;
|
217 |
}
|
218 |
} else assert(0);
|
219 |
}
|
220 |
}
|
221 |
}
|
222 |
}
|
223 |
//----------------------------------------------------------------------------------------
|
224 |
TString bdtiface::getBinName(double pt, double eta, int isglobal, int istracker)
|
225 |
{
|
226 |
// classify by eta and pt bins
|
227 |
for(unsigned ibin=0; ibin<binnamev_.size(); ibin++) {
|
228 |
TString binname(binnamev_[ibin]);
|
229 |
// cout << binname << "\t"
|
230 |
// << ptbin_lows_[binname] << "\t"
|
231 |
// << ptbin_highs_[binname] << "\t"
|
232 |
// << etabin_lows_[binname] << "\t"
|
233 |
// << etabin_highs_[binname] << "\t"
|
234 |
// << endl;
|
235 |
bool thisptbin=false,thisetabin=false,thisglobalbin=false,thistrackerbin=false;
|
236 |
if( pt >= ptbin_lows_[binname] && pt < ptbin_highs_[binname]) thisptbin = true;
|
237 |
if(fabs(eta) >= etabin_lows_[binname] && fabs(eta) < etabin_highs_[binname]) thisetabin = true;
|
238 |
if(isglobal != -1)
|
239 |
if(isglobal == glbbin_lows_[binname] && isglobal == glbbin_highs_[binname]) thisglobalbin = true;
|
240 |
if(istracker != -1)
|
241 |
if(istracker == trkbin_lows_[binname] && istracker == trkbin_highs_[binname]) thistrackerbin = true;
|
242 |
|
243 |
if(thisptbin && thisetabin && (isglobal==-1 || thisglobalbin) && (istracker==-1 || thistrackerbin)) return binname;
|
244 |
}
|
245 |
|
246 |
// assert(0);
|
247 |
cerr << "BIN NOT FOUND: " << pt << "\t" << eta << endl;
|
248 |
for(unsigned ibin=0; ibin<binnamev_.size(); ibin++) {
|
249 |
TString binname(binnamev_[ibin]);
|
250 |
cout << binnamev_[ibin] << "\t" << ptbin_lows_[binname] << "," << ptbin_highs_[binname] << "\t" << etabin_lows_[binname] << "," << etabin_highs_[binname] << endl;
|
251 |
}
|
252 |
|
253 |
assert(0);
|
254 |
}
|
255 |
//----------------------------------------------------------------------------------------
|
256 |
void bdtiface::readlimits(TString varfile)
|
257 |
{
|
258 |
// get the limits for each variable
|
259 |
ifstream ifs(varfile);
|
260 |
assert(ifs.is_open());
|
261 |
string line;
|
262 |
|
263 |
vector<TString> expressionv;
|
264 |
vector<char> typev;
|
265 |
TString bin("~");
|
266 |
while(getline(ifs,line)) {
|
267 |
if(line[0]=='#') continue;
|
268 |
stringstream ss(line);
|
269 |
|
270 |
if(line[0]=='@') {
|
271 |
continue;
|
272 |
} else {
|
273 |
TString type,vname,use,expression,etabin;
|
274 |
bool abs;
|
275 |
double min,max;
|
276 |
ss >> type >> vname >> use >> expression >> etabin >> abs >> min >> max;
|
277 |
minima_[vname] = (Float_t)min;
|
278 |
maxima_[vname] = (Float_t)max;
|
279 |
}
|
280 |
}
|
281 |
}
|
282 |
//----------------------------------------------------------------------------------------
|
283 |
void bdtiface::readBDTCuts(TString file)
|
284 |
{
|
285 |
|
286 |
// get bdt cut values from file
|
287 |
ifstream ifs;
|
288 |
ifs.open(file.Data());
|
289 |
if(!ifs.is_open()) {
|
290 |
cerr << "bdtiface: Warning: bdtcut file does not exist!" << endl;
|
291 |
// for(unsigned ibin=0; ibin<binnamev_.size(); ibin++) {
|
292 |
// TString binname(binnamev_[ibin]);
|
293 |
// loosecuts_[binname] = 999;
|
294 |
// mediumcuts_[binname] = 999;
|
295 |
// tightcuts_[binname] = 999;
|
296 |
// }
|
297 |
return;
|
298 |
}
|
299 |
string line;
|
300 |
vector<TString> wpnames;
|
301 |
while(getline(ifs,line)) {
|
302 |
stringstream ss(line);
|
303 |
// push back the working point names
|
304 |
if(line[0]=='@') {
|
305 |
TString atsign;
|
306 |
ss >> atsign;
|
307 |
while(!ss.eof()) {
|
308 |
TString wpname;
|
309 |
ss >> wpname;
|
310 |
wpname.ReplaceAll("@","");
|
311 |
wpnames.push_back(wpname);
|
312 |
}
|
313 |
continue;
|
314 |
}
|
315 |
|
316 |
if(wpnames.size()==0) { wpnames.push_back("loose"); wpnames.push_back("medium"); wpnames.push_back("tight"); }
|
317 |
|
318 |
string binname;
|
319 |
ss >> binname;
|
320 |
for(unsigned iwp=0; iwp<wpnames.size(); iwp++) {
|
321 |
double cutval;
|
322 |
ss >> cutval;
|
323 |
cuts_[binname+"_"+wpnames[iwp]] = *(new double(cutval));
|
324 |
// cout << binname << "\t" << wpnames[iwp] << cutval << endl;
|
325 |
}
|
326 |
// // Double_t loosecut,mediumcut,tightcut;
|
327 |
// // ss >> binname >> loosecut >> mediumcut >> tightcut;
|
328 |
// loosecuts_[binname] = cuts_[binname+"_loose"];
|
329 |
// mediumcuts_[binname] = cuts_[binname+"_medium"];
|
330 |
// tightcuts_[binname] = cuts_[binname+"_tight"];
|
331 |
}
|
332 |
}
|
333 |
//----------------------------------------------------------------------------------------
|
334 |
void bdtiface::setVals(map<TString,Float_t> floatvals)
|
335 |
{
|
336 |
// pass in a map of the variable values for each event
|
337 |
if(floatvals.size()!=max_nfloats) {
|
338 |
cerr << "ERROR, passed vars: " << floatvals.size() << "\t" << "number required: " << max_nfloats << endl;
|
339 |
map<TString,Float_t>::iterator it;
|
340 |
if(floatvals.size() > floatvals_.size()) {
|
341 |
for(it=floatvals.begin(); it!=floatvals.end(); it++) {
|
342 |
if(floatvals_.find((*it).first) == floatvals_.end()) cerr << "\t" << (*it).first << " not read from xml/txt in " << weightdir_(0,weightdir_.Last('/')) << endl;
|
343 |
}
|
344 |
} else {
|
345 |
for(it=floatvals_.begin(); it!=floatvals_.end(); it++) {
|
346 |
if(floatvals.find((*it).first) == floatvals.end()) cerr << "\t" << (*it).first << " not found in map of passed floatvals" << endl;
|
347 |
}
|
348 |
}
|
349 |
assert(0);
|
350 |
}
|
351 |
|
352 |
map<TString,Float_t>::iterator it_fl;
|
353 |
for(it_fl=floatvals.begin(); it_fl!=floatvals.end(); it_fl++) {
|
354 |
TString name((*it_fl).first);
|
355 |
if(floatvals_.find(name) == floatvals_.end()) { cerr << "bdtiface: " << name << " not found in float vals read from xml/txt" << endl; assert(0); }
|
356 |
Float_t value = floatvals[name];
|
357 |
assert(minima_.find(name) != minima_.end()); assert(maxima_.find(name) != maxima_.end());
|
358 |
if(minima_[name] != -9999999 && value < minima_[name]) value = minima_[name];
|
359 |
if(maxima_[name] != 9999999 && value > maxima_[name]) value = maxima_[name];
|
360 |
floatvals_[name] = value;
|
361 |
}
|
362 |
}
|
363 |
//----------------------------------------------------------------------------------------
|
364 |
double bdtiface::getBDTCut(double pt, double eta, TString tightness)
|
365 |
{
|
366 |
TString binname = getBinName(pt,eta);
|
367 |
return getBDTCut(binname,tightness);
|
368 |
}
|
369 |
//----------------------------------------------------------------------------------------
|
370 |
double bdtiface::getBDTCut(TString binname, TString tightness)
|
371 |
{
|
372 |
TString cutlabel(binname+"_"+tightness);
|
373 |
if(cuts_.find(binname+"_"+tightness) != cuts_.end()) return cuts_[binname+"_"+tightness];
|
374 |
cout << cutlabel << " not found!" << endl;
|
375 |
map<TString,double>::iterator it;
|
376 |
for(it=cuts_.begin(); it!=cuts_.end(); it++) {
|
377 |
cout << "\t" << (*it).first << "\t" << (*it).second << endl;
|
378 |
}
|
379 |
assert(0);
|
380 |
// if(tightness=="loose") return loosecuts_[binname];
|
381 |
// else if(tightness=="medium") return mediumcuts_[binname];
|
382 |
// else if(tightness=="tight") return tightcuts_[binname];
|
383 |
// else {cout << "error! bad tightness" << endl; assert(0); return -1;}
|
384 |
}
|
385 |
//----------------------------------------------------------------------------------------
|
386 |
double bdtiface::getBDTval(Double_t pt, Double_t eta, TString method)
|
387 |
{
|
388 |
TString binname((readers_.size()>1) ? getBinName(pt,eta) : "~");
|
389 |
assert(readers_.find(binname) != readers_.end());
|
390 |
TMVA::Reader *reader = readers_[binname];
|
391 |
return reader->EvaluateMVA(method);
|
392 |
}
|
393 |
//----------------------------------------------------------------------------------------
|
394 |
Bool_t bdtiface::passBDT(Double_t pt, Double_t eta, TString tightness, TString method)
|
395 |
{
|
396 |
double bdtcut = getBDTCut(pt,eta,tightness);
|
397 |
TString binname((readers_.size()>1) ? getBinName(pt,eta) : "~");
|
398 |
assert(readers_.find(binname) != readers_.end());
|
399 |
TMVA::Reader *reader = readers_[binname];
|
400 |
double bdtval = reader->EvaluateMVA(method);
|
401 |
|
402 |
//cerr << "bdtcut: " << bdtcut << "\tbdtval: " << bdtval << endl;
|
403 |
|
404 |
return bdtval > bdtcut;
|
405 |
}
|
406 |
//----------------------------------------------------------------------------------------
|
407 |
void bdtiface::print()
|
408 |
{
|
409 |
cout << "bdtiface -- weightdir: " << weightdir_ << endl;
|
410 |
}
|
411 |
|