ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitHzz4l/Angles/src/trainAngles.cc
(Generate patch)

Comparing UserCode/MitHzz4l/Angles/src/trainAngles.cc (file contents):
Revision 1.3 by dkralph, Mon Dec 17 12:35:34 2012 UTC vs.
Revision 1.4 by dkralph, Tue Jan 15 09:40:41 2013 UTC

# Line 15 | Line 15
15  
16   #include "KinematicsStruct.h"
17   #include "HistHeaders.h"
18 + #include "filestuff.h"
19 +
20 + #include "ZZMatrixElement/MELA/interface/Mela.h"
21 + #include "ZZMatrixElement/MELA/interface/PseudoMELA.h"
22 + #include "ZZMatrixElement/MELA/src/computeAngles.h"
23  
24   using namespace std;
25  
26 + void setTrainingVal(TString name, double *val, filestuff *fs);
27   void setFractions(float mH, float mH_lo, float mH_hi, float &frac_lo, float &frac_hi);
28   void setNevents(int nEvtMax, int nSigTrainMax, int nSigTestMax, int nBkgTrainMax, int nBkgTestMax, TString &nSig, TString &nBkg);
29  
# Line 36 | Line 42 | int main(int argc, char** argv) {
42    cout << "config: " << config << endl;
43  
44    // TString ntupledir;
45 <  vector<TString> classes,fileNames,types;
45 >  vector<TString> classes,fileNames,types,indirs,dsets,wgtStrs;
46    vector<double> fileFracs,fileMasses;
47    vector<TFile*> inFiles;
48    vector<TTree*> inTrees;
49 <  bool multiclass=false;
50 <  bool multisigs=false;
51 <  bool noFakeTest=false;
52 <  bool useWeightExpressions=false;
49 >  vector<filestuff*> fsv;
50 >  bool multiclass(false);
51 >  bool multisigs(false);
52 >  bool noFakeTest(false);
53 >  bool useMela(false);
54    TString NTrees(""),NNodesMax(""),Shrinkage(""),nCuts(""),NormMode("");
55    int nMax=1000000;
56    ifstream ifs(config);
# Line 55 | Line 62 | int main(int argc, char** argv) {
62      if(line[0]=='^') {
63        TString dummy;
64        // if(TString(line).Contains("^ntupledir")) ss >> dummy >> ntupledir;
65 <      if(TString(line).Contains("^multiclass")) { cout << "multiclass: true" << endl; multiclass = true; }
66 <      if(TString(line).Contains("^multisigs"))  { cout << "multisigs: true" << endl; multisigs = true; }
67 <      if(TString(line).Contains("^noFakeTest"))  { cout << "noFakeTest: true" << endl; noFakeTest = true; } // use all the events for training... we have an orthogonal sample for testing, anyway, and we need the stat
68 <      if(TString(line).Contains("^NTrees"))     ss >> dummy >> NTrees;
69 <      if(TString(line).Contains("^NNodesMax"))  ss >> dummy >> NNodesMax;
70 <      if(TString(line).Contains("^Shrinkage"))  ss >> dummy >> Shrinkage;
71 <      if(TString(line).Contains("^nCuts"))      ss >> dummy >> nCuts;
72 <      if(TString(line).Contains("^NormMode")) { ss >> dummy >> NormMode; assert(NormMode=="NumEvents" || NormMode=="EqualNumEvents"); }
73 <      if(TString(line).Contains("^nMax"))       ss >> dummy >> nMax;
74 <      if(TString(line).Contains("^useWeightExpressions")) { cout << "useWeightExpressions: true" << endl; useWeightExpressions = true; }
65 >      if(TString(line).Contains("^multiclass")){ multiclass = true;        cout << "multiclass" << multiclass << endl; }
66 >      if(TString(line).Contains("^multisigs" )){ multisigs = true;         cout << "multisigs"  << multisigs  << endl; }
67 >      if(TString(line).Contains("^noFakeTest")){ noFakeTest = true;        cout << "noFakeTest" << noFakeTest << endl; }
68 >      if(TString(line).Contains("^NTrees"    )){ ss >> dummy >> NTrees;    cout << "NTrees"     << NTrees     << endl; }
69 >      if(TString(line).Contains("^NNodesMax" )){ ss >> dummy >> NNodesMax; cout << "NNodesMax"  << NNodesMax  << endl; }
70 >      if(TString(line).Contains("^Shrinkage" )){ ss >> dummy >> Shrinkage; cout << "Shrinkage"  << Shrinkage  << endl; }
71 >      if(TString(line).Contains("^nCuts"     )){ ss >> dummy >> nCuts;     cout << "nCuts"      << nCuts      << endl; }
72 >      if(TString(line).Contains("^NormMode"  )){ ss >> dummy >> NormMode;  cout << "NormMode"   << NormMode   << endl; }
73 >      if(TString(line).Contains("^nMax"      )){ ss >> dummy >> nMax;      cout << "nMax"       << nMax       << endl; }
74 >      if(TString(line).Contains("^useMela"   )){ useMela = true;           cout << "useMela"    << useMela    << endl; }
75        continue;
76      }
77  
78 <    TString cls,fname,type;
78 >    TString cls,indir,type,dset,wgtStr;
79      double frac,fileMass;
80 <    ss >> cls >> type >> frac >> fname >> fileMass;
80 >    bool isdata;
81 >    ss >> cls >> type >> frac >> wgtStr >> indir >> dset >> fileMass >> isdata;
82      // assert(cls=="sig" || cls=="sig2" || cls=="sig3" || cls=="bkg");
83      classes.push_back(cls);
84      types.push_back(type);
85 +    indirs.push_back(indir);
86 +    dsets.push_back(dset);
87 +    TString fname(indir+"/"+dset+"/merged.root");
88 +    fsv.push_back(new filestuff(dset, fname, dset, isdata, 2012));
89      fileNames.push_back(/*ntupledir+"/"+*/fname);
90      fileFracs.push_back(frac);
91 +    wgtStrs.push_back(wgtStr);
92      fileMasses.push_back(fileMass);
93      cout << "    pushing back: " << cls << " " << setw(12) << frac << " " << fname << " " << fileMass << endl;
94      inFiles.push_back(TFile::Open(fileNames.back()));
# Line 130 | Line 143 | int main(int argc, char** argv) {
143      << "   frac_hi:      " << frac_hi << endl
144      << endl;
145  
146 <  TFile * outputFile = new TFile(outputrootfile, "RECREATE");
147 <
148 <  map<string,bool> varmap;
149 <  char * dovar = strtok(varstring, ":");
150 <  if( dovar != NULL ) varmap[string(dovar)]=true;
151 <  while( (dovar = strtok(NULL, ":")) != NULL ) {
152 <    varmap[string(dovar)]=true;
146 >  // get the list of variables we're going to use
147 >  vector<double> varVals;
148 >  TString varStr(varstring);
149 >  varStr.ReplaceAll(":"," ");
150 >  vector<TString> vars,specs;
151 >  stringstream ss(varStr.Data());
152 >  while(!ss.eof()) {
153 >    TString var;
154 >    ss >> var;
155 >    vars.push_back(var);
156 >    cout << "VAR: " << var << endl;
157 >    varVals.push_back(double(0));
158    }
159 +  specs.push_back("m4l");   varVals.push_back(double(0));
160 +  specs.push_back("run");   varVals.push_back(double(0));
161 +  specs.push_back("lumi");  varVals.push_back(double(0));
162 +  specs.push_back("evt");   varVals.push_back(double(0));
163 +  assert(varVals.size() == (vars.size() + specs.size()));
164    
165 <  char name[256];
143 <  sprintf(name, weightname );
165 >  TFile *outputFile = new TFile(outputrootfile, "RECREATE");
166    TMVA::Tools::Instance();
167    TString facStr("!V:!Silent:!DrawProgressBar");
168    if(multiclass) facStr = facStr+":AnalysisType=multiclass";
169 <  TMVA::Factory* factory = new TMVA::Factory( name, outputFile, facStr );
170 <  // if( varmap[string("costheta1"         )] ) factory->AddVariable( "costheta1",              'F',-1, 1);  
171 <  // if( varmap[string("costheta2"         )] ) factory->AddVariable( "costheta2",              'F',-1, 1);
172 <  // if( varmap[string("costhetastar" )] ) factory->AddVariable( "costhetastar",                'F',-1, 1 );
151 <  // if( varmap[string("Phi"       )] ) factory->AddVariable( "Phi",                    'F',-TMath::Pi(), TMath::Pi() );
152 <  // if( varmap[string("Phi1"      )] ) factory->AddVariable( "Phi1",                   'F',-TMath::Pi(), TMath::Pi() );
153 <  if( varmap[string("costheta1"    )] ) factory->AddVariable("costheta1",               'F');  
154 <  if( varmap[string("costheta2"    )] ) factory->AddVariable("costheta2",               'F');
155 <  if( varmap[string("costhetastar" )] ) factory->AddVariable("costhetastar",            'F');
156 <  if( varmap[string("Phi"          )] ) factory->AddVariable("Phi",                     'F');
157 <  if( varmap[string("Phi1"         )] ) factory->AddVariable("Phi1",                    'F');
158 <  if( varmap[string("mZ1"          )] ) factory->AddVariable("mZ1",                     'F');
159 <  if( varmap[string("mZ2"          )] ) factory->AddVariable("mZ2",                     'F');
160 <  if( varmap[string("pt4l"         )] ) factory->AddVariable("ZZpt/m4l",                'F');
161 <  if( varmap[string("zzdotz1"      )] ) factory->AddVariable("ZZdotZ1/(m4l*mZ1)",       'F');
162 <  if( varmap[string("zzdotz2"      )] ) factory->AddVariable("ZZdotZ2/(m4l*mZ2)",       'F');
163 <  // if( varmap[string("dphi1"     )] ) factory->AddVariable("ZZptCosDphiZ1pt", 'F', -TMath::Pi(), TMath::Pi() );
164 <  // if( varmap[string("dphi2"     )] ) factory->AddVariable("ZZptCosDphiZ2pt", 'F', -TMath::Pi(), TMath::Pi() );
165 <  if( varmap[string("dphi1"        )] ) factory->AddVariable("ZZptCosDphiZ1pt",         'F');
166 <  if( varmap[string("dphi2"        )] ) factory->AddVariable("ZZptCosDphiZ2pt",         'F');
167 <  if( varmap[string("Z1pt"         )] ) factory->AddVariable("Z1pt/m4l",                'F');
168 <  if( varmap[string("Z2pt"         )] ) factory->AddVariable("Z2pt/m4l",                'F');
169 <  if( varmap[string("y4l"          )] ) factory->AddVariable("ZZy",                     'F');
170 <  if( varmap[string("nJets"        )] ) factory->AddVariable("nJets",                   'F');
171 <  if( varmap[string("mjj"          )] ) factory->AddVariable("mjj",                     'F');
172 <  if( varmap[string("dEta"         )] ) factory->AddVariable("dEta",                    'F');
173 <  if( varmap[string("etaProd"      )] ) factory->AddVariable("etaProd",                 'F');
174 <  if( varmap[string("dphiJ1HiPtZ"  )] ) factory->AddVariable("dphiJ1HiPtZ",             'F');
175 <  if( varmap[string("dphiJ1LoPtZ"  )] ) factory->AddVariable("dphiJ1LoPtZ",             'F');
176 <  if( varmap[string("dEtaJ1HiPtZ"  )] ) factory->AddVariable("dEtaJ1HiPtZ",             'F');
177 <  if( varmap[string("dEtaJ1LoPtZ"  )] ) factory->AddVariable("dEtaJ1LoPtZ",             'F');
178 <  if( varmap[string("J1dotHiPtZ"   )] ) factory->AddVariable("J1dotHiPtZ",              'F');
179 <  if( varmap[string("J1dotLoPtZ"   )] ) factory->AddVariable("J1dotLoPtZ",              'F');
180 <  if( varmap[string("dphiJ2HiPtZ"  )] ) factory->AddVariable("dphiJ2HiPtZ",             'F');
181 <  if( varmap[string("dphiJ2LoPtZ"  )] ) factory->AddVariable("dphiJ2LoPtZ",             'F');
182 <  if( varmap[string("dEtaJ2HiPtZ"  )] ) factory->AddVariable("dEtaJ2HiPtZ",             'F');
183 <  if( varmap[string("dEtaJ2LoPtZ"  )] ) factory->AddVariable("dEtaJ2LoPtZ",             'F');
184 <  if( varmap[string("J2dotHiPtZ"   )] ) factory->AddVariable("J2dotHiPtZ",              'F');
185 <  if( varmap[string("J2dotLoPtZ"   )] ) factory->AddVariable("J2dotLoPtZ",              'F');
186 <
187 <  if( varmap[string("m4l")] )          factory->AddVariable( "m4l",              'F' );
188 <  else                                 factory->AddSpectator("m4l",              'F' );
189 <  factory->AddSpectator("run",              'i' );
190 <  factory->AddSpectator("lumi",             'i' );
191 <  factory->AddSpectator("evt",              'i' );
192 <
169 >  TMVA::Factory* factory = new TMVA::Factory(weightname, outputFile, facStr);
170 >  for(unsigned ivar=0; ivar<vars.size(); ivar++)      factory->AddVariable( vars[ivar],   'F');
171 >  for(unsigned ispec=0; ispec<specs.size(); ispec++)  factory->AddSpectator(specs[ispec], 'F');
172 >    
173    char buf[256];
174 <  sprintf( buf, "(m4l>%f&&m4l<%f)", (float)wL, (float)wH );
195 <  TCut mycuts(buf);
174 >  sprintf( buf, "(m4l>%f && m4l<%f)", (float)wL, (float)wH );
175    TString cutStr(buf);
176    cout << "cutStr: " << cutStr << endl;
177  
178 <  int NqqZZ(0),Nlljj(0);
179 <  vector<TString> sigFiles,bkgFiles;
180 <  vector<double> passFracs,sigFileFracs,bkgFileFracs,nPassCutv,nSigPassCutv,nBkgPassCutv;
178 >  bool useWgts(true);
179 >  vector<vector<TString> > files;
180 >  vector<double> passFracs,nPassCutv,baseWgts;
181 >  cout << setw(13) << "class" << setw(6) << "type" << setw(12) << "       nPass  " << setw(8) << "nTot" << setw(10) << "  frac" << setw(12) << "baseWgt" << endl;
182    for(unsigned ifile=0; ifile<fileNames.size(); ifile++) {
183 <    double nPassCut = double(inTrees[ifile]->GetEntries(cutStr));
184 <    if(fileFracs[ifile] == -1)
185 <      fileFracs[ifile] = mH_fracs[fileMasses[ifile]];
186 <    passFracs.push_back( nPassCut / inTrees[ifile]->GetEntries() );
187 <    nPassCutv.push_back( nPassCut );
188 <    cout << classes[ifile] << ":  "
189 <         << setw(8) << nPassCut << " / " << setw(8) << inTrees[ifile]->GetEntries() << " events pass cut in file: " << fileNames[ifile] << ", filefrac: " << fileFracs[ifile] << endl;
190 <
191 <    if(types[ifile]=="qqZZ") NqqZZ += nPassCut;
192 <    if(types[ifile]=="lljj") Nlljj += nPassCut;
193 <    
194 <    // if(classes[ifile].Contains("sig",TString::kIgnoreCase)) {
195 <    //   sigFiles.push_back(fileNames[ifile]);
196 <    //   sigFileFracs.push_back(fileFracs[ifile]);
197 <    //   nSigPassCutv.push_back( nPassCut );
198 <    // } else if(classes[ifile].Contains("bkg",TString::kIgnoreCase)) {
199 <    //   bkgFiles.push_back(fileNames[ifile]);
200 <    //   bkgFileFracs.push_back(fileFracs[ifile]);
201 <    //   nBkgPassCutv.push_back( nPassCut );
202 <    // } else assert(0);
183 >    if(fileFracs[ifile] == -1) fileFracs[ifile] = mH_fracs[fileMasses[ifile]];
184 >    double nPass(0),nTot(0),passFrac(0);
185 >    if(useWgts) {
186 >      TH1D hPass("hPass","",100,0,1000);
187 >      TH1D hTot("hTot","",100,0,1000);
188 >      TString wgtStr(wgtStrs[ifile].Contains("wInterf") ? "1" : wgtStrs[ifile]); // tree doesn't have a wInterf leaf
189 >      inTrees[ifile]->Draw("m4l>>hPass",wgtStr+"*"+cutStr);
190 >      inTrees[ifile]->Draw("m4l>>hTot",wgtStr);
191 >      passFrac = hPass.Integral() / hTot.Integral();
192 >      nPass  = hPass.Integral();
193 >      nTot   = hTot.Integral();
194 >    } else {
195 >      nPass = double(inTrees[ifile]->GetEntries(cutStr));
196 >      passFrac = nPass / inTrees[ifile]->GetEntries();
197 >      nTot = inTrees[ifile]->GetEntries();
198 >    }
199 >    passFracs.push_back(passFrac);
200 >    nPassCutv.push_back(nPass);
201 >    double baseWgt(fileFracs[ifile] / nPass);
202 >    // if(fsv[ifile]->dataset_.Contains("fakes",TString::kIgnoreCase) && noFakeTest)
203 >    //   baseWgt /= 2; // if we're putting all the fakes into training, divide weights by two
204 >    baseWgts.push_back(baseWgt);
205 >    cout << setw(13) << classes[ifile] << "(" << setw(5) << types[ifile] << "):  " << setw(10) << nPass << " / " << setw(12) << nTot
206 >         << setw(6) << setprecision(5) << fileFracs[ifile] << setw(15) << baseWgt
207 >         << setw(30) << dsets[ifile] << "   " << indirs[ifile]
208 >         << endl;
209    }
210  
211 <  // double qqZZoverlljj(double(NqqZZ)/Nlljj);
212 <  // cout << "found " << NqqZZ << " qqZZ and " << Nlljj << " lljj, so setting ratio to " << qqZZoverlljj << endl;
211 >  Mela *mela(0);
212 >  if(useMela) mela = new Mela(false, 8);
213  
214 <  // double nMin=999999;
215 <  // double nSig=0;
216 <  // if(sigFiles.size()==2) {
217 <  //   cout << " signal comparison: " << nSigPassCutv[0]*sigFileFracs[1] << " " <<  nSigPassCutv[1]*sigFileFracs[0] << endl;
218 <  //   if(nSigPassCutv[0]*sigFileFracs[1] > nSigPassCutv[1]*sigFileFracs[0]) { // statistics are limited by second signal file
219 <  //     cout << " use second signal as min signal" << endl;
220 <  //     nMin = nSig = nSigPassCutv[1] * ( 1 + sigFileFracs[0] / sigFileFracs[1] );
221 <  //   } else { // limited by first one
222 <  //     cout << " use first signal as min signal" << endl;
223 <  //     nMin = nSig = nSigPassCutv[0] * ( 1 + sigFileFracs[1] / sigFileFracs[0] );
224 <  //   }
225 <  // } else if(sigFiles.size()==1 && mH_lo==0 && mH_hi==0) { // not interpolating
226 <  //   nMin = nSig = nSigPassCutv[0];
227 <  // } else assert(multisigs);
228 <  // cout << "nMin after signal check: " << nMin << endl;
229 <
230 <  // long nBkg=0,nBkg2=0;
231 <  // for(unsigned ibkg=0; ibkg<bkgFiles.size(); ibkg++) {
232 <  //   if(multiclass && bkgFiles[ibkg].Contains("fakes"))
233 <  //     nBkg2 += nBkgPassCutv[ibkg];
234 <  //   else
235 <  //     nBkg += nBkgPassCutv[ibkg];
236 <  // }
237 <  // cout << "nBkg: " << nBkg << "\tnBkg2: " << nBkg2 << endl;
238 <  // if(!multisigs && nBkg < nMin) nMin = nBkg; // don't include fakes in this comparison. Also, for multisigs we train two signal samples against each other, so we don't care if there is less bkg
239 <  // cout << "nMin after bkg check: " << nMin << endl;
240 <
241 <  // double nTarget = min(nMin,double(nMax));
242 <  // cout << "nTarget: " << nTarget << endl;
257 <
258 <  // map<TString,double> fracUsed;
259 <  // if(nTarget < nSig) fracUsed["sig"] = double(nTarget) / nSig; else fracUsed["sig"] = 1;
260 <  // if(nTarget < nBkg) fracUsed["bkg"] = double(nTarget) / nBkg; else fracUsed["bkg"] = 1;
261 <  // cout << "frac Used sig: " << fracUsed["sig"] << ", bkg: " << fracUsed["bkg"] << endl;
262 <  // if(multisigs) {
263 <  //   fracUsed["sig2"] = fracUsed["sig3"] = fracUsed["sig"];
264 <  // }
265 <
266 <  // vector<TTree*> TreeCopies; // copies of the input trees, but with only the number of events we want
267 <  // TList *sigList  = new TList;
268 <  // TList *bkgList  = new TList;
269 <  // TList *bkg2List = new TList;
270 <  // TList *sig2List = new TList;
271 <  // TList *sig3List = new TList;
272 <  // cout << "making tree copies: " << endl;
273 <  // for(unsigned ifile=0; ifile<fileNames.size(); ifile++) {
274 <  //   // if(mH_fracs.find(fileMasses[ifile]) == mH_fracs.end()) { cout << "filemasses[ifile]: " << fileMasses[ifile] << endl; assert(0); }
275 <
276 <  //   long nToRequest;
277 <  //   nToRequest = inTrees[ifile]->GetEntries();
278 <  //   if(!(multiclass && classes[ifile].Contains("bkg") && fileNames[ifile].Contains("fakes")))
279 <  //     nToRequest *= fracUsed[classes[ifile]];
280 <  //   cout << "  adding " << setw(12) << passFracs[ifile]*nToRequest << " from file " << fileNames[ifile]
281 <  //     << ", which means asking for: " << nToRequest << endl;
282 <  //   TreeCopies.push_back(inTrees[ifile]->CopyTree(cutStr,"",nToRequest));
283 <  //   if(classes[ifile].Contains("sig",TString::kIgnoreCase)) {
284 <  //     if(!multisigs) {
285 <  //    sigList->Add(TreeCopies.back());
286 <  //     } else {
287 <  //    if(classes[ifile]=="sig")        sigList->Add(TreeCopies.back());
288 <  //    else if(classes[ifile]=="sig2")  { cout << "added to sig2list" << endl; sig2List->Add(TreeCopies.back()); }
289 <  //    else if(classes[ifile]=="sig3")  { cout << "added to sig3list" << endl; sig3List->Add(TreeCopies.back()); }
290 <  //    else assert(0);
291 <  //     }
292 <  //   }
293 <  //   if(classes[ifile].Contains("bkg")) {
294 <  //     if(multiclass && fileNames[ifile].Contains("fakes"))
295 <  //    bkg2List->Add(TreeCopies.back());
296 <  //     else
297 <  //    bkgList->Add(TreeCopies.back());
298 <  //   }
299 <  // }
300 <
301 <  // TTree *sigTreeAll  = TTree::MergeTrees(sigList);
302 <  // TTree *sig2TreeAll = TTree::MergeTrees(sig2List);
303 <  // TTree *sig3TreeAll = sig3List->GetSize() ? TTree::MergeTrees(sig3List) : 0;
304 <  // TTree *bkgTreeAll  = TTree::MergeTrees(bkgList);
305 <  // TTree *bkg2TreeAll = multiclass ? TTree::MergeTrees(bkg2List) : 0;
306 <  // cout << "sigTreeAll entries:  " << sigTreeAll->GetEntries() << endl;
307 <  // cout << "bkgTreeAll entries:  " << bkgTreeAll->GetEntries() << endl;
308 <  // if(multiclass) {
309 <  //   cout << "bkg2TreeAll entries: " << bkg2TreeAll->GetEntries() << endl;
310 <  //   if(multisigs) {
311 <  //     cout << "sig2TreeAll entries: " << sig2TreeAll->GetEntries() << endl;
312 <  //     if(sig3TreeAll)
313 <  //    cout << "sig3TreeAll entries: " << sig3TreeAll->GetEntries() << endl;
314 <  //   }
315 <  // }
316 <
317 <  for(unsigned ifile=0; ifile<classes.size(); ifile++) {
318 <    // double xtraWgt(1);
319 <    // if(types[ifile]=="lljj") xtraWgt = qqZZoverlljj;
320 <    factory->AddTree( inTrees[ifile], classes[ifile],   1,      TCut(cutStr + (noFakeTest ? "" : "*((evt%2)==0)")),  TMVA::Types::kTraining);
321 <    factory->AddTree( inTrees[ifile], classes[ifile],   1,      TCut(cutStr + (noFakeTest ? "" : "*((evt%2)!=0)")),  TMVA::Types::kTesting);
322 <  }
214 >  for(unsigned ifs=0; ifs<fsv.size(); ifs++) {
215 >    filestuff *fs = fsv[ifs];
216 >    cout << "starting: " << fs->dataset_ << endl;
217 >    double tot(0);
218 >    for(unsigned ientry=0; ientry<fs->getentries("zznt"); ientry++) {
219 >      fs->getentry(ientry,"kinematics","zznt");
220 >      if(fs->kine->m4l < wL || fs->kine->m4l > wH) continue;
221 >      fs->getentry(ientry,"","zznt");
222 >      if(ientry<100)
223 >        cout << fs->ji->nJets << endl;
224 >      
225 >      for(unsigned ivar=0; ivar<vars.size(); ivar++)
226 >        setTrainingVal(vars[ivar],   &varVals[ivar],  fs);
227 >      for(unsigned ispec=vars.size(); ispec<varVals.size(); ispec++)
228 >        setTrainingVal(specs[ispec-vars.size()], &varVals[ispec], fs);
229 >      
230 >      // train on even events
231 >      // if noFakeTest train on *all* the lljj events since we don't have many to start with
232 >      double wgt(baseWgts[ifs]);
233 >      if(wgtStrs[ifs] == "w")            wgt *= fs->weights->w;
234 >      else if(wgtStrs[ifs] == "1")       wgt *= 1;
235 >      else if(wgtStrs[ifs] == "wInterf") wgt *= getInterferenceWeight(fs,mela);
236 >      else assert(0);
237 >      if(wgt<.000000001 || wgt>5) { cout << "bad wgt?" << wgt << endl;}
238 >
239 >      if((ientry%2)==0 || (fs->dataset_.Contains("fakes",TString::kIgnoreCase) && noFakeTest))
240 >        factory->AddEvent(classes[ifs], TMVA::Types::kTraining, varVals, wgt);
241 >      if((ientry%2)!=0)
242 >        factory->AddEvent(classes[ifs], TMVA::Types::kTesting,  varVals, wgt);
243  
244 <  if(useWeightExpressions) {
245 <    TString lumi("17000");
246 <    factory->SetWeightExpression(lumi+"*w","Signal");
327 <    factory->SetWeightExpression(lumi+"*w","Signal2");
328 <    factory->SetWeightExpression("w*(1 + ("+lumi+" - 1)*(run==1 || lumi==1))","Background"); // for fakes, don't apply lumi (for mc, either run is 1 or lumi is 1
244 >      tot += wgt;
245 >    }
246 >    cout << "  " << tot << endl;
247    }
330  // factory->AddTree( sigTreeAll, "Signal",    1,      "((evt%2)==0)",  TMVA::Types::kTraining);
331  // factory->AddTree( sigTreeAll, "Signal",    1,      "((evt%2)!=0)",  TMVA::Types::kTesting);
332  // if(multisigs) {
333  //   if(multiclass) {
334  //     factory->AddTree( sig2TreeAll, "Signal2",      1,      "((evt%2)==0)",  TMVA::Types::kTraining);
335  //     factory->AddTree( sig2TreeAll, "Signal2",      1,      "((evt%2)!=0)",  TMVA::Types::kTesting);
336  //   } else {
337  //     factory->AddTree( sig2TreeAll, "Background",   1,      "((evt%2)==0)",  TMVA::Types::kTraining);
338  //     factory->AddTree( sig2TreeAll, "Background",   1,      "((evt%2)!=0)",  TMVA::Types::kTesting);
339  //   }
340  //   if(sig3TreeAll) {
341  //     factory->AddTree( sig3TreeAll, "Signal3",      1,      "((evt%2)==0)",  TMVA::Types::kTraining);
342  //     factory->AddTree( sig3TreeAll, "Signal3",      1,      "((evt%2)!=0)",  TMVA::Types::kTesting);
343  //   }
344  // } else {
345  //   factory->AddTree( bkgTreeAll, "Background",      1,      "((evt%2)==0)",  TMVA::Types::kTraining);
346  //   factory->AddTree( bkgTreeAll, "Background",      1,      "((evt%2)!=0)",  TMVA::Types::kTesting);
347  //   if(multiclass) {
348  //     if(noFakeTest) {
349  //    factory->AddTree( bkg2TreeAll, "Background2",   1,      "",  TMVA::Types::kTraining);
350  //    factory->AddTree( bkg2TreeAll, "Background2",   1,      "",  TMVA::Types::kTesting); // just make 'em the same, should work...
351  //     } else {
352  //    factory->AddTree( bkg2TreeAll, "Background2",   1,      "((evt%2)==0)",  TMVA::Types::kTraining);
353  //    factory->AddTree( bkg2TreeAll, "Background2",   1,      "((evt%2)!=0)",  TMVA::Types::kTesting);
354  //     }
355  //   }
356  // }
248  
249    if(NTrees=="")    NTrees    = "10000";
250    if(NNodesMax=="") NNodesMax = "10";
251    if(Shrinkage=="") Shrinkage = "0.05";
252    if(nCuts=="")     nCuts     = "20";
253 <  if(NormMode=="")  NormMode  = "NumEvents";
253 >  if(NormMode=="")  NormMode  = "EqualNumEvents";
254 >  assert(NormMode=="NumEvents" || NormMode=="EqualNumEvents");
255    factory->PrepareTrainingAndTestTree( "", "SplitMode=Random:V:NormMode="+NormMode);
256    factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:V:NTrees="+NTrees
257                         +":NNodesMax="+NNodesMax
# Line 368 | Line 260 | int main(int argc, char** argv) {
260                         // +":nTrain_Signal=100:nTest_Signal=100:nTrain_Background=100:nTest_Background=100:nTrain_Signal2=100:nTest_Signal2=100"
261                         +":IgnoreNegWeights");
262  
371  // KH params:
372  // factory->BookMethod( TMVA::Types::kBDT, "BDTG","!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:UseYesNoLeaf=False");//NNodesMax=8
373  // decorrelate them first:
374  // factory->BookMethod( TMVA::Types::kBDT, "BDTG","!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:UseYesNoLeaf=False:VarTransform=D");//NNodesMax=8
375
376  // josh:
377  // factory->BookMethod( TMVA::Types::kBDT, "BDTG_josh" ,  "!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:MaxDepth=3:NNodesMax=100000:UseYesNoLeaf=F:nEventsMin=10000:");
378  // factory->BookMethod( TMVA::Types::kBDT, "BDTG_josh" ,  "!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:MaxDepth=3:NNodesMax=100000:UseYesNoLeaf=F:nEventsMin=1000:");
379  // 'phil' params:
380  // factory->BookMethod( TMVA::Types::kBDT, "BDTG_newparams","!H:V:NTrees=200:BoostType=Grad:Shrinkage=0.01:!UseBaggedGrad:nCuts=2000:nEventsMin=100:NNodesMax=5:MaxDepth=100");
381  // small number of very dep trees
382  // ntrees should be smaller (200-500)
383  //   max depth 100
384  // shrink 0.01
385
263    factory->TrainAllMethods();
264    factory->TestAllMethods();
265    factory->EvaluateAllMethods();
# Line 419 | Line 296 | void setNevents(int nEvtMax, int nSigTra
296    // ss << min(nSigTrainMax,nSigTestMax) << " " << min(nBkgTrainMax,nBkgTestMax);
297    // ss >> nSig >> nBkg;
298   }
299 + //----------------------------------------------------------------------------------------
300 + // void setTrainingVals(vector<TString> vars, vector<double> &varVals, vector<TString> specs, vector<double> &specVals, filestuff *fs)
301 + void setTrainingVal(TString name, double *val, filestuff *fs)
302 + {
303 +    if(name=="costheta1")               *val = fs->angles->costheta1;
304 +    else if(name=="costheta2")          *val = fs->angles->costheta2;
305 +    else if(name=="costhetastar")       *val = fs->angles->costhetastar;
306 +    else if(name=="Phi")                *val = fs->angles->Phi;
307 +    else if(name=="Phi1")               *val = fs->angles->Phi1;
308 +    else if(name=="mZ1")                *val = fs->kine->mZ1;
309 +    else if(name=="mZ2")                *val = fs->kine->mZ2;
310 +    // pt variables
311 +    // else if(name=="ZZpt/m4l")                *val = fs->kine->ZZpt/fs->kine->m4l;
312 +    // else if(name=="ZZdotZ1/(m4l*mZ1)")       *val = fs->kine->ZZdotZ1/(fs->kine->m4l*fs->kine->mZ1);
313 +    // else if(name=="ZZdotZ2/(m4l*mZ2)")       *val = fs->kine->ZZdotZ2/(fs->kine->m4l*fs->kine->mZ2);
314 +    else if(name=="ZZpt")               *val = fs->kine->ZZpt/fs->kine->m4l;
315 +    else if(name=="ZZdotZ1")            *val = fs->kine->ZZdotZ1/(fs->kine->m4l*fs->kine->mZ1);
316 +    else if(name=="ZZdotZ2")            *val = fs->kine->ZZdotZ2/(fs->kine->m4l*fs->kine->mZ2);
317 +    else if(name=="ZZptCosDphiZ1pt")    *val = fs->kine->ZZptZ1ptCosDphi;
318 +    else if(name=="ZZptCosDphiZ2pt")    *val = fs->kine->ZZptZ2ptCosDphi;
319 +    // else if(name=="Z1pt/m4l")                *val = fs->kine->Z1pt/fs->kine->m4l;
320 +    // else if(name=="Z2pt/m4l")                *val = fs->kine->Z2pt/fs->kine->m4l;
321 +    else if(name=="Z1pt")               *val = fs->kine->Z1pt/fs->kine->m4l;
322 +    else if(name=="Z2pt")               *val = fs->kine->Z2pt/fs->kine->m4l;
323 +    else if(name=="ZZy")                *val = fs->kine->ZZy;
324 +    // jet variables
325 +    else if(name=="nJets"       )       *val = fs->vk->getval("nJets");
326 +    else if(name=="mjj"         )       *val = fs->vk->getval("mjj");          
327 +    else if(name=="dEta"        )       *val = fs->vk->getval("dEta");  
328 +    else if(name=="etaProd"     )       *val = fs->vk->getval("etaProd");      
329 +    else if(name=="dphiJ1HiPtZ" )       *val = fs->vk->getval("dphiJ1HiPtZ");  
330 +    else if(name=="dphiJ1LoPtZ" )       *val = fs->vk->getval("dphiJ1LoPtZ");  
331 +    else if(name=="dEtaJ1HiPtZ" )       *val = fs->vk->getval("dEtaJ1HiPtZ");  
332 +    else if(name=="dEtaJ1LoPtZ" )       *val = fs->vk->getval("dEtaJ1LoPtZ");  
333 +    else if(name=="J1dotHiPtZ"  )       *val = fs->vk->getval("J1dotHiPtZ");    
334 +    else if(name=="J1dotLoPtZ"  )       *val = fs->vk->getval("J1dotLoPtZ");    
335 +    else if(name=="dphiJ2HiPtZ" )       *val = fs->vk->getval("dphiJ2HiPtZ");  
336 +    else if(name=="dphiJ2LoPtZ" )       *val = fs->vk->getval("dphiJ2LoPtZ");  
337 +    else if(name=="dEtaJ2HiPtZ" )       *val = fs->vk->getval("dEtaJ2HiPtZ");  
338 +    else if(name=="dEtaJ2LoPtZ" )       *val = fs->vk->getval("dEtaJ2LoPtZ");  
339 +    else if(name=="J2dotHiPtZ"  )       *val = fs->vk->getval("J2dotHiPtZ");    
340 +    else if(name=="J2dotLoPtZ"  )       *val = fs->vk->getval("J2dotLoPtZ");
341 +    // spectators
342 +    else if(name=="m4l")                *val = fs->kine->m4l;
343 +    else if(name=="run")                *val = fs->info->run;
344 +    else if(name=="lumi")               *val = fs->info->lumi;
345 +    else if(name=="evt")                *val = fs->info->evt;
346 +    else { cout << name << " not found!" << endl; assert(0); }
347 + }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines