15 |
|
|
16 |
|
#include "KinematicsStruct.h" |
17 |
|
#include "HistHeaders.h" |
18 |
+ |
#include "filestuff.h" |
19 |
+ |
|
20 |
+ |
#include "ZZMatrixElement/MELA/interface/Mela.h" |
21 |
+ |
#include "ZZMatrixElement/MELA/interface/PseudoMELA.h" |
22 |
+ |
#include "ZZMatrixElement/MELA/src/computeAngles.h" |
23 |
|
|
24 |
|
using namespace std; |
25 |
|
|
26 |
+ |
void setTrainingVal(TString name, double *val, filestuff *fs); |
27 |
|
void setFractions(float mH, float mH_lo, float mH_hi, float &frac_lo, float &frac_hi); |
28 |
|
void setNevents(int nEvtMax, int nSigTrainMax, int nSigTestMax, int nBkgTrainMax, int nBkgTestMax, TString &nSig, TString &nBkg); |
29 |
|
|
42 |
|
cout << "config: " << config << endl; |
43 |
|
|
44 |
|
// TString ntupledir; |
45 |
< |
vector<TString> classes,fileNames,types; |
45 |
> |
vector<TString> classes,fileNames,types,indirs,dsets,wgtStrs; |
46 |
|
vector<double> fileFracs,fileMasses; |
47 |
|
vector<TFile*> inFiles; |
48 |
|
vector<TTree*> inTrees; |
49 |
< |
bool multiclass=false; |
50 |
< |
bool multisigs=false; |
51 |
< |
bool noFakeTest=false; |
52 |
< |
bool useWeightExpressions=false; |
49 |
> |
vector<filestuff*> fsv; |
50 |
> |
bool multiclass(false); |
51 |
> |
bool multisigs(false); |
52 |
> |
bool noFakeTest(false); |
53 |
> |
bool useMela(false); |
54 |
|
TString NTrees(""),NNodesMax(""),Shrinkage(""),nCuts(""),NormMode(""); |
55 |
|
int nMax=1000000; |
56 |
|
ifstream ifs(config); |
62 |
|
if(line[0]=='^') { |
63 |
|
TString dummy; |
64 |
|
// if(TString(line).Contains("^ntupledir")) ss >> dummy >> ntupledir; |
65 |
< |
if(TString(line).Contains("^multiclass")) { cout << "multiclass: true" << endl; multiclass = true; } |
66 |
< |
if(TString(line).Contains("^multisigs")) { cout << "multisigs: true" << endl; multisigs = true; } |
67 |
< |
if(TString(line).Contains("^noFakeTest")) { cout << "noFakeTest: true" << endl; noFakeTest = true; } // use all the events for training... we have an orthogonal sample for testing, anyway, and we need the stat |
68 |
< |
if(TString(line).Contains("^NTrees")) ss >> dummy >> NTrees; |
69 |
< |
if(TString(line).Contains("^NNodesMax")) ss >> dummy >> NNodesMax; |
70 |
< |
if(TString(line).Contains("^Shrinkage")) ss >> dummy >> Shrinkage; |
71 |
< |
if(TString(line).Contains("^nCuts")) ss >> dummy >> nCuts; |
72 |
< |
if(TString(line).Contains("^NormMode")) { ss >> dummy >> NormMode; assert(NormMode=="NumEvents" || NormMode=="EqualNumEvents"); } |
73 |
< |
if(TString(line).Contains("^nMax")) ss >> dummy >> nMax; |
74 |
< |
if(TString(line).Contains("^useWeightExpressions")) { cout << "useWeightExpressions: true" << endl; useWeightExpressions = true; } |
65 |
> |
if(TString(line).Contains("^multiclass")){ multiclass = true; cout << "multiclass" << multiclass << endl; } |
66 |
> |
if(TString(line).Contains("^multisigs" )){ multisigs = true; cout << "multisigs" << multisigs << endl; } |
67 |
> |
if(TString(line).Contains("^noFakeTest")){ noFakeTest = true; cout << "noFakeTest" << noFakeTest << endl; } |
68 |
> |
if(TString(line).Contains("^NTrees" )){ ss >> dummy >> NTrees; cout << "NTrees" << NTrees << endl; } |
69 |
> |
if(TString(line).Contains("^NNodesMax" )){ ss >> dummy >> NNodesMax; cout << "NNodesMax" << NNodesMax << endl; } |
70 |
> |
if(TString(line).Contains("^Shrinkage" )){ ss >> dummy >> Shrinkage; cout << "Shrinkage" << Shrinkage << endl; } |
71 |
> |
if(TString(line).Contains("^nCuts" )){ ss >> dummy >> nCuts; cout << "nCuts" << nCuts << endl; } |
72 |
> |
if(TString(line).Contains("^NormMode" )){ ss >> dummy >> NormMode; cout << "NormMode" << NormMode << endl; } |
73 |
> |
if(TString(line).Contains("^nMax" )){ ss >> dummy >> nMax; cout << "nMax" << nMax << endl; } |
74 |
> |
if(TString(line).Contains("^useMela" )){ useMela = true; cout << "useMela" << useMela << endl; } |
75 |
|
continue; |
76 |
|
} |
77 |
|
|
78 |
< |
TString cls,fname,type; |
78 |
> |
TString cls,indir,type,dset,wgtStr; |
79 |
|
double frac,fileMass; |
80 |
< |
ss >> cls >> type >> frac >> fname >> fileMass; |
80 |
> |
bool isdata; |
81 |
> |
ss >> cls >> type >> frac >> wgtStr >> indir >> dset >> fileMass >> isdata; |
82 |
|
// assert(cls=="sig" || cls=="sig2" || cls=="sig3" || cls=="bkg"); |
83 |
|
classes.push_back(cls); |
84 |
|
types.push_back(type); |
85 |
+ |
indirs.push_back(indir); |
86 |
+ |
dsets.push_back(dset); |
87 |
+ |
TString fname(indir+"/"+dset+"/merged.root"); |
88 |
+ |
fsv.push_back(new filestuff(dset, fname, dset, isdata, 2012)); |
89 |
|
fileNames.push_back(/*ntupledir+"/"+*/fname); |
90 |
|
fileFracs.push_back(frac); |
91 |
+ |
wgtStrs.push_back(wgtStr); |
92 |
|
fileMasses.push_back(fileMass); |
93 |
|
cout << " pushing back: " << cls << " " << setw(12) << frac << " " << fname << " " << fileMass << endl; |
94 |
|
inFiles.push_back(TFile::Open(fileNames.back())); |
143 |
|
<< " frac_hi: " << frac_hi << endl |
144 |
|
<< endl; |
145 |
|
|
146 |
< |
TFile * outputFile = new TFile(outputrootfile, "RECREATE"); |
147 |
< |
|
148 |
< |
map<string,bool> varmap; |
149 |
< |
char * dovar = strtok(varstring, ":"); |
150 |
< |
if( dovar != NULL ) varmap[string(dovar)]=true; |
151 |
< |
while( (dovar = strtok(NULL, ":")) != NULL ) { |
152 |
< |
varmap[string(dovar)]=true; |
146 |
> |
// get the list of variables we're going to use |
147 |
> |
vector<double> varVals; |
148 |
> |
TString varStr(varstring); |
149 |
> |
varStr.ReplaceAll(":"," "); |
150 |
> |
vector<TString> vars,specs; |
151 |
> |
stringstream ss(varStr.Data()); |
152 |
> |
while(!ss.eof()) { |
153 |
> |
TString var; |
154 |
> |
ss >> var; |
155 |
> |
vars.push_back(var); |
156 |
> |
cout << "VAR: " << var << endl; |
157 |
> |
varVals.push_back(double(0)); |
158 |
|
} |
159 |
+ |
specs.push_back("m4l"); varVals.push_back(double(0)); |
160 |
+ |
specs.push_back("run"); varVals.push_back(double(0)); |
161 |
+ |
specs.push_back("lumi"); varVals.push_back(double(0)); |
162 |
+ |
specs.push_back("evt"); varVals.push_back(double(0)); |
163 |
+ |
assert(varVals.size() == (vars.size() + specs.size())); |
164 |
|
|
165 |
< |
char name[256]; |
143 |
< |
sprintf(name, weightname ); |
165 |
> |
TFile *outputFile = new TFile(outputrootfile, "RECREATE"); |
166 |
|
TMVA::Tools::Instance(); |
167 |
|
TString facStr("!V:!Silent:!DrawProgressBar"); |
168 |
|
if(multiclass) facStr = facStr+":AnalysisType=multiclass"; |
169 |
< |
TMVA::Factory* factory = new TMVA::Factory( name, outputFile, facStr ); |
170 |
< |
// if( varmap[string("costheta1" )] ) factory->AddVariable( "costheta1", 'F',-1, 1); |
171 |
< |
// if( varmap[string("costheta2" )] ) factory->AddVariable( "costheta2", 'F',-1, 1); |
172 |
< |
// if( varmap[string("costhetastar" )] ) factory->AddVariable( "costhetastar", 'F',-1, 1 ); |
151 |
< |
// if( varmap[string("Phi" )] ) factory->AddVariable( "Phi", 'F',-TMath::Pi(), TMath::Pi() ); |
152 |
< |
// if( varmap[string("Phi1" )] ) factory->AddVariable( "Phi1", 'F',-TMath::Pi(), TMath::Pi() ); |
153 |
< |
if( varmap[string("costheta1" )] ) factory->AddVariable("costheta1", 'F'); |
154 |
< |
if( varmap[string("costheta2" )] ) factory->AddVariable("costheta2", 'F'); |
155 |
< |
if( varmap[string("costhetastar" )] ) factory->AddVariable("costhetastar", 'F'); |
156 |
< |
if( varmap[string("Phi" )] ) factory->AddVariable("Phi", 'F'); |
157 |
< |
if( varmap[string("Phi1" )] ) factory->AddVariable("Phi1", 'F'); |
158 |
< |
if( varmap[string("mZ1" )] ) factory->AddVariable("mZ1", 'F'); |
159 |
< |
if( varmap[string("mZ2" )] ) factory->AddVariable("mZ2", 'F'); |
160 |
< |
if( varmap[string("pt4l" )] ) factory->AddVariable("ZZpt/m4l", 'F'); |
161 |
< |
if( varmap[string("zzdotz1" )] ) factory->AddVariable("ZZdotZ1/(m4l*mZ1)", 'F'); |
162 |
< |
if( varmap[string("zzdotz2" )] ) factory->AddVariable("ZZdotZ2/(m4l*mZ2)", 'F'); |
163 |
< |
// if( varmap[string("dphi1" )] ) factory->AddVariable("ZZptCosDphiZ1pt", 'F', -TMath::Pi(), TMath::Pi() ); |
164 |
< |
// if( varmap[string("dphi2" )] ) factory->AddVariable("ZZptCosDphiZ2pt", 'F', -TMath::Pi(), TMath::Pi() ); |
165 |
< |
if( varmap[string("dphi1" )] ) factory->AddVariable("ZZptCosDphiZ1pt", 'F'); |
166 |
< |
if( varmap[string("dphi2" )] ) factory->AddVariable("ZZptCosDphiZ2pt", 'F'); |
167 |
< |
if( varmap[string("Z1pt" )] ) factory->AddVariable("Z1pt/m4l", 'F'); |
168 |
< |
if( varmap[string("Z2pt" )] ) factory->AddVariable("Z2pt/m4l", 'F'); |
169 |
< |
if( varmap[string("y4l" )] ) factory->AddVariable("ZZy", 'F'); |
170 |
< |
if( varmap[string("nJets" )] ) factory->AddVariable("nJets", 'F'); |
171 |
< |
if( varmap[string("mjj" )] ) factory->AddVariable("mjj", 'F'); |
172 |
< |
if( varmap[string("dEta" )] ) factory->AddVariable("dEta", 'F'); |
173 |
< |
if( varmap[string("etaProd" )] ) factory->AddVariable("etaProd", 'F'); |
174 |
< |
if( varmap[string("dphiJ1HiPtZ" )] ) factory->AddVariable("dphiJ1HiPtZ", 'F'); |
175 |
< |
if( varmap[string("dphiJ1LoPtZ" )] ) factory->AddVariable("dphiJ1LoPtZ", 'F'); |
176 |
< |
if( varmap[string("dEtaJ1HiPtZ" )] ) factory->AddVariable("dEtaJ1HiPtZ", 'F'); |
177 |
< |
if( varmap[string("dEtaJ1LoPtZ" )] ) factory->AddVariable("dEtaJ1LoPtZ", 'F'); |
178 |
< |
if( varmap[string("J1dotHiPtZ" )] ) factory->AddVariable("J1dotHiPtZ", 'F'); |
179 |
< |
if( varmap[string("J1dotLoPtZ" )] ) factory->AddVariable("J1dotLoPtZ", 'F'); |
180 |
< |
if( varmap[string("dphiJ2HiPtZ" )] ) factory->AddVariable("dphiJ2HiPtZ", 'F'); |
181 |
< |
if( varmap[string("dphiJ2LoPtZ" )] ) factory->AddVariable("dphiJ2LoPtZ", 'F'); |
182 |
< |
if( varmap[string("dEtaJ2HiPtZ" )] ) factory->AddVariable("dEtaJ2HiPtZ", 'F'); |
183 |
< |
if( varmap[string("dEtaJ2LoPtZ" )] ) factory->AddVariable("dEtaJ2LoPtZ", 'F'); |
184 |
< |
if( varmap[string("J2dotHiPtZ" )] ) factory->AddVariable("J2dotHiPtZ", 'F'); |
185 |
< |
if( varmap[string("J2dotLoPtZ" )] ) factory->AddVariable("J2dotLoPtZ", 'F'); |
186 |
< |
|
187 |
< |
if( varmap[string("m4l")] ) factory->AddVariable( "m4l", 'F' ); |
188 |
< |
else factory->AddSpectator("m4l", 'F' ); |
189 |
< |
factory->AddSpectator("run", 'i' ); |
190 |
< |
factory->AddSpectator("lumi", 'i' ); |
191 |
< |
factory->AddSpectator("evt", 'i' ); |
192 |
< |
|
169 |
> |
TMVA::Factory* factory = new TMVA::Factory(weightname, outputFile, facStr); |
170 |
> |
for(unsigned ivar=0; ivar<vars.size(); ivar++) factory->AddVariable( vars[ivar], 'F'); |
171 |
> |
for(unsigned ispec=0; ispec<specs.size(); ispec++) factory->AddSpectator(specs[ispec], 'F'); |
172 |
> |
|
173 |
|
char buf[256]; |
174 |
< |
sprintf( buf, "(m4l>%f&&m4l<%f)", (float)wL, (float)wH ); |
195 |
< |
TCut mycuts(buf); |
174 |
> |
sprintf( buf, "(m4l>%f && m4l<%f)", (float)wL, (float)wH ); |
175 |
|
TString cutStr(buf); |
176 |
|
cout << "cutStr: " << cutStr << endl; |
177 |
|
|
178 |
< |
int NqqZZ(0),Nlljj(0); |
179 |
< |
vector<TString> sigFiles,bkgFiles; |
180 |
< |
vector<double> passFracs,sigFileFracs,bkgFileFracs,nPassCutv,nSigPassCutv,nBkgPassCutv; |
178 |
> |
bool useWgts(true); |
179 |
> |
vector<vector<TString> > files; |
180 |
> |
vector<double> passFracs,nPassCutv,baseWgts; |
181 |
> |
cout << setw(13) << "class" << setw(6) << "type" << setw(12) << " nPass " << setw(8) << "nTot" << setw(10) << " frac" << setw(12) << "baseWgt" << endl; |
182 |
|
for(unsigned ifile=0; ifile<fileNames.size(); ifile++) { |
183 |
< |
double nPassCut = double(inTrees[ifile]->GetEntries(cutStr)); |
184 |
< |
if(fileFracs[ifile] == -1) |
185 |
< |
fileFracs[ifile] = mH_fracs[fileMasses[ifile]]; |
186 |
< |
passFracs.push_back( nPassCut / inTrees[ifile]->GetEntries() ); |
187 |
< |
nPassCutv.push_back( nPassCut ); |
188 |
< |
cout << classes[ifile] << ": " |
189 |
< |
<< setw(8) << nPassCut << " / " << setw(8) << inTrees[ifile]->GetEntries() << " events pass cut in file: " << fileNames[ifile] << ", filefrac: " << fileFracs[ifile] << endl; |
190 |
< |
|
191 |
< |
if(types[ifile]=="qqZZ") NqqZZ += nPassCut; |
192 |
< |
if(types[ifile]=="lljj") Nlljj += nPassCut; |
193 |
< |
|
194 |
< |
// if(classes[ifile].Contains("sig",TString::kIgnoreCase)) { |
195 |
< |
// sigFiles.push_back(fileNames[ifile]); |
196 |
< |
// sigFileFracs.push_back(fileFracs[ifile]); |
197 |
< |
// nSigPassCutv.push_back( nPassCut ); |
198 |
< |
// } else if(classes[ifile].Contains("bkg",TString::kIgnoreCase)) { |
199 |
< |
// bkgFiles.push_back(fileNames[ifile]); |
200 |
< |
// bkgFileFracs.push_back(fileFracs[ifile]); |
201 |
< |
// nBkgPassCutv.push_back( nPassCut ); |
202 |
< |
// } else assert(0); |
183 |
> |
if(fileFracs[ifile] == -1) fileFracs[ifile] = mH_fracs[fileMasses[ifile]]; |
184 |
> |
double nPass(0),nTot(0),passFrac(0); |
185 |
> |
if(useWgts) { |
186 |
> |
TH1D hPass("hPass","",100,0,1000); |
187 |
> |
TH1D hTot("hTot","",100,0,1000); |
188 |
> |
TString wgtStr(wgtStrs[ifile].Contains("wInterf") ? "1" : wgtStrs[ifile]); // tree doesn't have a wInterf leaf |
189 |
> |
inTrees[ifile]->Draw("m4l>>hPass",wgtStr+"*"+cutStr); |
190 |
> |
inTrees[ifile]->Draw("m4l>>hTot",wgtStr); |
191 |
> |
passFrac = hPass.Integral() / hTot.Integral(); |
192 |
> |
nPass = hPass.Integral(); |
193 |
> |
nTot = hTot.Integral(); |
194 |
> |
} else { |
195 |
> |
nPass = double(inTrees[ifile]->GetEntries(cutStr)); |
196 |
> |
passFrac = nPass / inTrees[ifile]->GetEntries(); |
197 |
> |
nTot = inTrees[ifile]->GetEntries(); |
198 |
> |
} |
199 |
> |
passFracs.push_back(passFrac); |
200 |
> |
nPassCutv.push_back(nPass); |
201 |
> |
double baseWgt(fileFracs[ifile] / nPass); |
202 |
> |
// if(fsv[ifile]->dataset_.Contains("fakes",TString::kIgnoreCase) && noFakeTest) |
203 |
> |
// baseWgt /= 2; // if we're putting all the fakes into training, divide weights by two |
204 |
> |
baseWgts.push_back(baseWgt); |
205 |
> |
cout << setw(13) << classes[ifile] << "(" << setw(5) << types[ifile] << "): " << setw(10) << nPass << " / " << setw(12) << nTot |
206 |
> |
<< setw(6) << setprecision(5) << fileFracs[ifile] << setw(15) << baseWgt |
207 |
> |
<< setw(30) << dsets[ifile] << " " << indirs[ifile] |
208 |
> |
<< endl; |
209 |
|
} |
210 |
|
|
211 |
< |
// double qqZZoverlljj(double(NqqZZ)/Nlljj); |
212 |
< |
// cout << "found " << NqqZZ << " qqZZ and " << Nlljj << " lljj, so setting ratio to " << qqZZoverlljj << endl; |
211 |
> |
Mela *mela(0); |
212 |
> |
if(useMela) mela = new Mela(false, 8); |
213 |
|
|
214 |
< |
// double nMin=999999; |
215 |
< |
// double nSig=0; |
216 |
< |
// if(sigFiles.size()==2) { |
217 |
< |
// cout << " signal comparison: " << nSigPassCutv[0]*sigFileFracs[1] << " " << nSigPassCutv[1]*sigFileFracs[0] << endl; |
218 |
< |
// if(nSigPassCutv[0]*sigFileFracs[1] > nSigPassCutv[1]*sigFileFracs[0]) { // statistics are limited by second signal file |
219 |
< |
// cout << " use second signal as min signal" << endl; |
220 |
< |
// nMin = nSig = nSigPassCutv[1] * ( 1 + sigFileFracs[0] / sigFileFracs[1] ); |
221 |
< |
// } else { // limited by first one |
222 |
< |
// cout << " use first signal as min signal" << endl; |
223 |
< |
// nMin = nSig = nSigPassCutv[0] * ( 1 + sigFileFracs[1] / sigFileFracs[0] ); |
224 |
< |
// } |
225 |
< |
// } else if(sigFiles.size()==1 && mH_lo==0 && mH_hi==0) { // not interpolating |
226 |
< |
// nMin = nSig = nSigPassCutv[0]; |
227 |
< |
// } else assert(multisigs); |
228 |
< |
// cout << "nMin after signal check: " << nMin << endl; |
229 |
< |
|
230 |
< |
// long nBkg=0,nBkg2=0; |
231 |
< |
// for(unsigned ibkg=0; ibkg<bkgFiles.size(); ibkg++) { |
232 |
< |
// if(multiclass && bkgFiles[ibkg].Contains("fakes")) |
233 |
< |
// nBkg2 += nBkgPassCutv[ibkg]; |
234 |
< |
// else |
235 |
< |
// nBkg += nBkgPassCutv[ibkg]; |
236 |
< |
// } |
237 |
< |
// cout << "nBkg: " << nBkg << "\tnBkg2: " << nBkg2 << endl; |
238 |
< |
// if(!multisigs && nBkg < nMin) nMin = nBkg; // don't include fakes in this comparison. Also, for multisigs we train two signal samples against each other, so we don't care if there is less bkg |
239 |
< |
// cout << "nMin after bkg check: " << nMin << endl; |
240 |
< |
|
241 |
< |
// double nTarget = min(nMin,double(nMax)); |
242 |
< |
// cout << "nTarget: " << nTarget << endl; |
257 |
< |
|
258 |
< |
// map<TString,double> fracUsed; |
259 |
< |
// if(nTarget < nSig) fracUsed["sig"] = double(nTarget) / nSig; else fracUsed["sig"] = 1; |
260 |
< |
// if(nTarget < nBkg) fracUsed["bkg"] = double(nTarget) / nBkg; else fracUsed["bkg"] = 1; |
261 |
< |
// cout << "frac Used sig: " << fracUsed["sig"] << ", bkg: " << fracUsed["bkg"] << endl; |
262 |
< |
// if(multisigs) { |
263 |
< |
// fracUsed["sig2"] = fracUsed["sig3"] = fracUsed["sig"]; |
264 |
< |
// } |
265 |
< |
|
266 |
< |
// vector<TTree*> TreeCopies; // copies of the input trees, but with only the number of events we want |
267 |
< |
// TList *sigList = new TList; |
268 |
< |
// TList *bkgList = new TList; |
269 |
< |
// TList *bkg2List = new TList; |
270 |
< |
// TList *sig2List = new TList; |
271 |
< |
// TList *sig3List = new TList; |
272 |
< |
// cout << "making tree copies: " << endl; |
273 |
< |
// for(unsigned ifile=0; ifile<fileNames.size(); ifile++) { |
274 |
< |
// // if(mH_fracs.find(fileMasses[ifile]) == mH_fracs.end()) { cout << "filemasses[ifile]: " << fileMasses[ifile] << endl; assert(0); } |
275 |
< |
|
276 |
< |
// long nToRequest; |
277 |
< |
// nToRequest = inTrees[ifile]->GetEntries(); |
278 |
< |
// if(!(multiclass && classes[ifile].Contains("bkg") && fileNames[ifile].Contains("fakes"))) |
279 |
< |
// nToRequest *= fracUsed[classes[ifile]]; |
280 |
< |
// cout << " adding " << setw(12) << passFracs[ifile]*nToRequest << " from file " << fileNames[ifile] |
281 |
< |
// << ", which means asking for: " << nToRequest << endl; |
282 |
< |
// TreeCopies.push_back(inTrees[ifile]->CopyTree(cutStr,"",nToRequest)); |
283 |
< |
// if(classes[ifile].Contains("sig",TString::kIgnoreCase)) { |
284 |
< |
// if(!multisigs) { |
285 |
< |
// sigList->Add(TreeCopies.back()); |
286 |
< |
// } else { |
287 |
< |
// if(classes[ifile]=="sig") sigList->Add(TreeCopies.back()); |
288 |
< |
// else if(classes[ifile]=="sig2") { cout << "added to sig2list" << endl; sig2List->Add(TreeCopies.back()); } |
289 |
< |
// else if(classes[ifile]=="sig3") { cout << "added to sig3list" << endl; sig3List->Add(TreeCopies.back()); } |
290 |
< |
// else assert(0); |
291 |
< |
// } |
292 |
< |
// } |
293 |
< |
// if(classes[ifile].Contains("bkg")) { |
294 |
< |
// if(multiclass && fileNames[ifile].Contains("fakes")) |
295 |
< |
// bkg2List->Add(TreeCopies.back()); |
296 |
< |
// else |
297 |
< |
// bkgList->Add(TreeCopies.back()); |
298 |
< |
// } |
299 |
< |
// } |
300 |
< |
|
301 |
< |
// TTree *sigTreeAll = TTree::MergeTrees(sigList); |
302 |
< |
// TTree *sig2TreeAll = TTree::MergeTrees(sig2List); |
303 |
< |
// TTree *sig3TreeAll = sig3List->GetSize() ? TTree::MergeTrees(sig3List) : 0; |
304 |
< |
// TTree *bkgTreeAll = TTree::MergeTrees(bkgList); |
305 |
< |
// TTree *bkg2TreeAll = multiclass ? TTree::MergeTrees(bkg2List) : 0; |
306 |
< |
// cout << "sigTreeAll entries: " << sigTreeAll->GetEntries() << endl; |
307 |
< |
// cout << "bkgTreeAll entries: " << bkgTreeAll->GetEntries() << endl; |
308 |
< |
// if(multiclass) { |
309 |
< |
// cout << "bkg2TreeAll entries: " << bkg2TreeAll->GetEntries() << endl; |
310 |
< |
// if(multisigs) { |
311 |
< |
// cout << "sig2TreeAll entries: " << sig2TreeAll->GetEntries() << endl; |
312 |
< |
// if(sig3TreeAll) |
313 |
< |
// cout << "sig3TreeAll entries: " << sig3TreeAll->GetEntries() << endl; |
314 |
< |
// } |
315 |
< |
// } |
316 |
< |
|
317 |
< |
for(unsigned ifile=0; ifile<classes.size(); ifile++) { |
318 |
< |
// double xtraWgt(1); |
319 |
< |
// if(types[ifile]=="lljj") xtraWgt = qqZZoverlljj; |
320 |
< |
factory->AddTree( inTrees[ifile], classes[ifile], 1, TCut(cutStr + (noFakeTest ? "" : "*((evt%2)==0)")), TMVA::Types::kTraining); |
321 |
< |
factory->AddTree( inTrees[ifile], classes[ifile], 1, TCut(cutStr + (noFakeTest ? "" : "*((evt%2)!=0)")), TMVA::Types::kTesting); |
322 |
< |
} |
214 |
> |
for(unsigned ifs=0; ifs<fsv.size(); ifs++) { |
215 |
> |
filestuff *fs = fsv[ifs]; |
216 |
> |
cout << "starting: " << fs->dataset_ << endl; |
217 |
> |
double tot(0); |
218 |
> |
for(unsigned ientry=0; ientry<fs->getentries("zznt"); ientry++) { |
219 |
> |
fs->getentry(ientry,"kinematics","zznt"); |
220 |
> |
if(fs->kine->m4l < wL || fs->kine->m4l > wH) continue; |
221 |
> |
fs->getentry(ientry,"","zznt"); |
222 |
> |
if(ientry<100) |
223 |
> |
cout << fs->ji->nJets << endl; |
224 |
> |
|
225 |
> |
for(unsigned ivar=0; ivar<vars.size(); ivar++) |
226 |
> |
setTrainingVal(vars[ivar], &varVals[ivar], fs); |
227 |
> |
for(unsigned ispec=vars.size(); ispec<varVals.size(); ispec++) |
228 |
> |
setTrainingVal(specs[ispec-vars.size()], &varVals[ispec], fs); |
229 |
> |
|
230 |
> |
// train on even events |
231 |
> |
// if noFakeTest train on *all* the lljj events since we don't have many to start with |
232 |
> |
double wgt(baseWgts[ifs]); |
233 |
> |
if(wgtStrs[ifs] == "w") wgt *= fs->weights->w; |
234 |
> |
else if(wgtStrs[ifs] == "1") wgt *= 1; |
235 |
> |
else if(wgtStrs[ifs] == "wInterf") wgt *= getInterferenceWeight(fs,mela); |
236 |
> |
else assert(0); |
237 |
> |
if(wgt<.000000001 || wgt>5) { cout << "bad wgt?" << wgt << endl;} |
238 |
> |
|
239 |
> |
if((ientry%2)==0 || (fs->dataset_.Contains("fakes",TString::kIgnoreCase) && noFakeTest)) |
240 |
> |
factory->AddEvent(classes[ifs], TMVA::Types::kTraining, varVals, wgt); |
241 |
> |
if((ientry%2)!=0) |
242 |
> |
factory->AddEvent(classes[ifs], TMVA::Types::kTesting, varVals, wgt); |
243 |
|
|
244 |
< |
if(useWeightExpressions) { |
245 |
< |
TString lumi("17000"); |
246 |
< |
factory->SetWeightExpression(lumi+"*w","Signal"); |
327 |
< |
factory->SetWeightExpression(lumi+"*w","Signal2"); |
328 |
< |
factory->SetWeightExpression("w*(1 + ("+lumi+" - 1)*(run==1 || lumi==1))","Background"); // for fakes, don't apply lumi (for mc, either run is 1 or lumi is 1 |
244 |
> |
tot += wgt; |
245 |
> |
} |
246 |
> |
cout << " " << tot << endl; |
247 |
|
} |
330 |
– |
// factory->AddTree( sigTreeAll, "Signal", 1, "((evt%2)==0)", TMVA::Types::kTraining); |
331 |
– |
// factory->AddTree( sigTreeAll, "Signal", 1, "((evt%2)!=0)", TMVA::Types::kTesting); |
332 |
– |
// if(multisigs) { |
333 |
– |
// if(multiclass) { |
334 |
– |
// factory->AddTree( sig2TreeAll, "Signal2", 1, "((evt%2)==0)", TMVA::Types::kTraining); |
335 |
– |
// factory->AddTree( sig2TreeAll, "Signal2", 1, "((evt%2)!=0)", TMVA::Types::kTesting); |
336 |
– |
// } else { |
337 |
– |
// factory->AddTree( sig2TreeAll, "Background", 1, "((evt%2)==0)", TMVA::Types::kTraining); |
338 |
– |
// factory->AddTree( sig2TreeAll, "Background", 1, "((evt%2)!=0)", TMVA::Types::kTesting); |
339 |
– |
// } |
340 |
– |
// if(sig3TreeAll) { |
341 |
– |
// factory->AddTree( sig3TreeAll, "Signal3", 1, "((evt%2)==0)", TMVA::Types::kTraining); |
342 |
– |
// factory->AddTree( sig3TreeAll, "Signal3", 1, "((evt%2)!=0)", TMVA::Types::kTesting); |
343 |
– |
// } |
344 |
– |
// } else { |
345 |
– |
// factory->AddTree( bkgTreeAll, "Background", 1, "((evt%2)==0)", TMVA::Types::kTraining); |
346 |
– |
// factory->AddTree( bkgTreeAll, "Background", 1, "((evt%2)!=0)", TMVA::Types::kTesting); |
347 |
– |
// if(multiclass) { |
348 |
– |
// if(noFakeTest) { |
349 |
– |
// factory->AddTree( bkg2TreeAll, "Background2", 1, "", TMVA::Types::kTraining); |
350 |
– |
// factory->AddTree( bkg2TreeAll, "Background2", 1, "", TMVA::Types::kTesting); // just make 'em the same, should work... |
351 |
– |
// } else { |
352 |
– |
// factory->AddTree( bkg2TreeAll, "Background2", 1, "((evt%2)==0)", TMVA::Types::kTraining); |
353 |
– |
// factory->AddTree( bkg2TreeAll, "Background2", 1, "((evt%2)!=0)", TMVA::Types::kTesting); |
354 |
– |
// } |
355 |
– |
// } |
356 |
– |
// } |
248 |
|
|
249 |
|
if(NTrees=="") NTrees = "10000"; |
250 |
|
if(NNodesMax=="") NNodesMax = "10"; |
251 |
|
if(Shrinkage=="") Shrinkage = "0.05"; |
252 |
|
if(nCuts=="") nCuts = "20"; |
253 |
< |
if(NormMode=="") NormMode = "NumEvents"; |
253 |
> |
if(NormMode=="") NormMode = "EqualNumEvents"; |
254 |
> |
assert(NormMode=="NumEvents" || NormMode=="EqualNumEvents"); |
255 |
|
factory->PrepareTrainingAndTestTree( "", "SplitMode=Random:V:NormMode="+NormMode); |
256 |
|
factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:V:NTrees="+NTrees |
257 |
|
+":NNodesMax="+NNodesMax |
260 |
|
// +":nTrain_Signal=100:nTest_Signal=100:nTrain_Background=100:nTest_Background=100:nTrain_Signal2=100:nTest_Signal2=100" |
261 |
|
+":IgnoreNegWeights"); |
262 |
|
|
371 |
– |
// KH params: |
372 |
– |
// factory->BookMethod( TMVA::Types::kBDT, "BDTG","!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:UseYesNoLeaf=False");//NNodesMax=8 |
373 |
– |
// decorrelate them first: |
374 |
– |
// factory->BookMethod( TMVA::Types::kBDT, "BDTG","!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:UseYesNoLeaf=False:VarTransform=D");//NNodesMax=8 |
375 |
– |
|
376 |
– |
// josh: |
377 |
– |
// factory->BookMethod( TMVA::Types::kBDT, "BDTG_josh" , "!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:MaxDepth=3:NNodesMax=100000:UseYesNoLeaf=F:nEventsMin=10000:"); |
378 |
– |
// factory->BookMethod( TMVA::Types::kBDT, "BDTG_josh" , "!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:MaxDepth=3:NNodesMax=100000:UseYesNoLeaf=F:nEventsMin=1000:"); |
379 |
– |
// 'phil' params: |
380 |
– |
// factory->BookMethod( TMVA::Types::kBDT, "BDTG_newparams","!H:V:NTrees=200:BoostType=Grad:Shrinkage=0.01:!UseBaggedGrad:nCuts=2000:nEventsMin=100:NNodesMax=5:MaxDepth=100"); |
381 |
– |
// small number of very dep trees |
382 |
– |
// ntrees should be smaller (200-500) |
383 |
– |
// max depth 100 |
384 |
– |
// shrink 0.01 |
385 |
– |
|
263 |
|
factory->TrainAllMethods(); |
264 |
|
factory->TestAllMethods(); |
265 |
|
factory->EvaluateAllMethods(); |
296 |
|
// ss << min(nSigTrainMax,nSigTestMax) << " " << min(nBkgTrainMax,nBkgTestMax); |
297 |
|
// ss >> nSig >> nBkg; |
298 |
|
} |
299 |
+ |
//---------------------------------------------------------------------------------------- |
300 |
+ |
// void setTrainingVals(vector<TString> vars, vector<double> &varVals, vector<TString> specs, vector<double> &specVals, filestuff *fs) |
301 |
+ |
void setTrainingVal(TString name, double *val, filestuff *fs) |
302 |
+ |
{ |
303 |
+ |
if(name=="costheta1") *val = fs->angles->costheta1; |
304 |
+ |
else if(name=="costheta2") *val = fs->angles->costheta2; |
305 |
+ |
else if(name=="costhetastar") *val = fs->angles->costhetastar; |
306 |
+ |
else if(name=="Phi") *val = fs->angles->Phi; |
307 |
+ |
else if(name=="Phi1") *val = fs->angles->Phi1; |
308 |
+ |
else if(name=="mZ1") *val = fs->kine->mZ1; |
309 |
+ |
else if(name=="mZ2") *val = fs->kine->mZ2; |
310 |
+ |
// pt variables |
311 |
+ |
// else if(name=="ZZpt/m4l") *val = fs->kine->ZZpt/fs->kine->m4l; |
312 |
+ |
// else if(name=="ZZdotZ1/(m4l*mZ1)") *val = fs->kine->ZZdotZ1/(fs->kine->m4l*fs->kine->mZ1); |
313 |
+ |
// else if(name=="ZZdotZ2/(m4l*mZ2)") *val = fs->kine->ZZdotZ2/(fs->kine->m4l*fs->kine->mZ2); |
314 |
+ |
else if(name=="ZZpt") *val = fs->kine->ZZpt/fs->kine->m4l; |
315 |
+ |
else if(name=="ZZdotZ1") *val = fs->kine->ZZdotZ1/(fs->kine->m4l*fs->kine->mZ1); |
316 |
+ |
else if(name=="ZZdotZ2") *val = fs->kine->ZZdotZ2/(fs->kine->m4l*fs->kine->mZ2); |
317 |
+ |
else if(name=="ZZptCosDphiZ1pt") *val = fs->kine->ZZptZ1ptCosDphi; |
318 |
+ |
else if(name=="ZZptCosDphiZ2pt") *val = fs->kine->ZZptZ2ptCosDphi; |
319 |
+ |
// else if(name=="Z1pt/m4l") *val = fs->kine->Z1pt/fs->kine->m4l; |
320 |
+ |
// else if(name=="Z2pt/m4l") *val = fs->kine->Z2pt/fs->kine->m4l; |
321 |
+ |
else if(name=="Z1pt") *val = fs->kine->Z1pt/fs->kine->m4l; |
322 |
+ |
else if(name=="Z2pt") *val = fs->kine->Z2pt/fs->kine->m4l; |
323 |
+ |
else if(name=="ZZy") *val = fs->kine->ZZy; |
324 |
+ |
// jet variables |
325 |
+ |
else if(name=="nJets" ) *val = fs->vk->getval("nJets"); |
326 |
+ |
else if(name=="mjj" ) *val = fs->vk->getval("mjj"); |
327 |
+ |
else if(name=="dEta" ) *val = fs->vk->getval("dEta"); |
328 |
+ |
else if(name=="etaProd" ) *val = fs->vk->getval("etaProd"); |
329 |
+ |
else if(name=="dphiJ1HiPtZ" ) *val = fs->vk->getval("dphiJ1HiPtZ"); |
330 |
+ |
else if(name=="dphiJ1LoPtZ" ) *val = fs->vk->getval("dphiJ1LoPtZ"); |
331 |
+ |
else if(name=="dEtaJ1HiPtZ" ) *val = fs->vk->getval("dEtaJ1HiPtZ"); |
332 |
+ |
else if(name=="dEtaJ1LoPtZ" ) *val = fs->vk->getval("dEtaJ1LoPtZ"); |
333 |
+ |
else if(name=="J1dotHiPtZ" ) *val = fs->vk->getval("J1dotHiPtZ"); |
334 |
+ |
else if(name=="J1dotLoPtZ" ) *val = fs->vk->getval("J1dotLoPtZ"); |
335 |
+ |
else if(name=="dphiJ2HiPtZ" ) *val = fs->vk->getval("dphiJ2HiPtZ"); |
336 |
+ |
else if(name=="dphiJ2LoPtZ" ) *val = fs->vk->getval("dphiJ2LoPtZ"); |
337 |
+ |
else if(name=="dEtaJ2HiPtZ" ) *val = fs->vk->getval("dEtaJ2HiPtZ"); |
338 |
+ |
else if(name=="dEtaJ2LoPtZ" ) *val = fs->vk->getval("dEtaJ2LoPtZ"); |
339 |
+ |
else if(name=="J2dotHiPtZ" ) *val = fs->vk->getval("J2dotHiPtZ"); |
340 |
+ |
else if(name=="J2dotLoPtZ" ) *val = fs->vk->getval("J2dotLoPtZ"); |
341 |
+ |
// spectators |
342 |
+ |
else if(name=="m4l") *val = fs->kine->m4l; |
343 |
+ |
else if(name=="run") *val = fs->info->run; |
344 |
+ |
else if(name=="lumi") *val = fs->info->lumi; |
345 |
+ |
else if(name=="evt") *val = fs->info->evt; |
346 |
+ |
else { cout << name << " not found!" << endl; assert(0); } |
347 |
+ |
} |