1 |
|
/* jetfit.cpp - Package to fit multi-Gaussian distributions to histograms |
2 |
– |
* rewrite after accidental deletion 07-24-09 |
2 |
|
* Author: David Nisson |
3 |
|
*/ |
4 |
|
|
26 |
|
#include <Rtypes.h> |
27 |
|
#include <TFormula.h> |
28 |
|
#include <TSystem.h> |
29 |
+ |
#include <TMath.h> |
30 |
|
|
31 |
|
#include "UserCode/JetFitAnalyzer/interface/jetfit.h" |
32 |
|
|
40 |
|
// configurable options |
41 |
|
bool ignorezero = false; // ignore zero bins when fitting |
42 |
|
|
43 |
– |
int ngauss; // number of Gaussians in current model |
44 |
– |
|
45 |
– |
struct histo { |
46 |
– |
vector< vector<double> > bins; |
47 |
– |
double Xlo, Xhi, Ylo, Yhi; |
48 |
– |
} energy; |
49 |
– |
|
50 |
– |
|
43 |
|
model_def *mdef; |
44 |
|
|
45 |
|
model_def& curr_model_def() { |
50 |
|
mdef = _mdef; |
51 |
|
} |
52 |
|
|
53 |
< |
int get_ngauss() { |
53 |
> |
int model_def::get_ngauss() { |
54 |
|
return ngauss; |
55 |
|
} |
56 |
|
|
57 |
< |
void set_ngauss(int _ngauss) { |
57 |
> |
void model_def::set_ngauss(int _ngauss) { |
58 |
|
ngauss = _ngauss; |
59 |
|
} |
60 |
|
|
61 |
|
// fit function |
62 |
< |
double fit_fcn(double x, double y, double *xval) { |
63 |
< |
int npar_indiv = mdef->get_formula()->GetNpar(); |
62 |
> |
double model_def::fit_fcn(double x, double y, double *xval) { |
63 |
> |
int npar_indiv = get_formula()->GetNpar(); |
64 |
|
double val = 0.0; |
65 |
|
for (int i = 0; i < ngauss; i++) { |
66 |
< |
mdef->get_formula()->SetParameters(xval + i*npar_indiv); |
66 |
> |
get_formula()->SetParameters(xval + i*npar_indiv); |
67 |
|
val += mdef->get_formula()->Eval(x, y); |
68 |
|
} |
69 |
|
return val; |
71 |
|
|
72 |
|
// TF2-compatible fit function |
73 |
|
double fit_fcn_TF2(double *x, double *pval) { |
74 |
< |
double val = fit_fcn(x[0], x[1], pval); |
74 |
> |
double val = mdef->fit_fcn(x[0], x[1], pval); |
75 |
|
return val; |
76 |
|
} |
77 |
|
|
78 |
|
// Integral of (model formula)^2 / chisquare sigma |
79 |
< |
double formula_int(double xlo, double xhi, double ylo, double yhi, |
79 |
> |
double model_def::formula_int(double xlo, double xhi, double ylo, double yhi, |
80 |
|
double *pval, double XbinSize, double YbinSize, |
81 |
|
double *xval) { |
82 |
|
double xstep = (xhi - xlo) / static_cast<double>(20); |
84 |
|
|
85 |
|
double fsum = 0.0; |
86 |
|
double pval_old[256]; |
87 |
< |
int npar = mdef->get_formula()->GetNpar(); |
87 |
> |
int npar = get_formula()->GetNpar(); |
88 |
|
if (npar > 256) { |
89 |
|
cerr << "Parameter overload" << endl; |
90 |
|
return 0.0; |
91 |
|
} |
92 |
< |
mdef->get_formula()->GetParameters(pval_old); |
93 |
< |
mdef->get_formula()->SetParameters(pval); |
92 |
> |
get_formula()->GetParameters(pval_old); |
93 |
> |
get_formula()->SetParameters(pval); |
94 |
|
|
95 |
|
for (int i = 0; i < 20; i++) { |
96 |
|
for (int j = 0; j < 20; j++) { |
98 |
|
double y = (static_cast<double>(j) + 0.5)*ystep + ylo; |
99 |
|
double sig_cut = 1.0e-5; |
100 |
|
double nu = XbinSize * YbinSize * fit_fcn(x, y, xval); |
101 |
< |
double sigma = fabs(mdef->chisquare_error(0.0)) < sig_cut |
102 |
< |
? sig_cut : mdef->chisquare_error(0.0); |
101 |
> |
double sigma = fabs(chisquare_error(0.0)) < sig_cut |
102 |
> |
? sig_cut : chisquare_error(0.0); |
103 |
|
|
104 |
< |
fsum += pow(xstep * ystep * mdef->get_formula()->Eval(x, y), 2.0) |
104 |
> |
fsum += pow(xstep * ystep * get_formula()->Eval(x, y), 2.0) |
105 |
|
/ sigma / sigma; |
106 |
|
} |
107 |
|
} |
108 |
|
|
109 |
< |
mdef->get_formula()->SetParameters(pval_old); |
109 |
> |
get_formula()->SetParameters(pval_old); |
110 |
|
return fsum; |
111 |
|
} |
112 |
|
|
126 |
|
/ static_cast<double>(energy.bins.size()) + energy.Xlo; |
127 |
|
double y = (static_cast<double>(j) + 0.5) * (energy.Yhi - energy.Ylo) |
128 |
|
/ static_cast<double>(energy.bins.at(i).size()) + energy.Ylo; |
129 |
< |
double nu = fit_fcn(x, y, xval) * XbinSize * YbinSize; |
129 |
> |
double nu = mdef->fit_fcn(x, y, xval) * XbinSize * YbinSize; |
130 |
|
double sig_cut = 1.0e-5; |
131 |
|
if (fabs(mdef->chisquare_error(energy.bins.at(i).at(j))) > sig_cut || !ignorezero) { |
132 |
|
chisquare += pow(energy.bins.at(i).at(j) - nu, 2.0) |
141 |
|
|
142 |
|
// add errors due to Gaussians outside histogram |
143 |
|
double eps = 0.01; // accuracy set for this function |
144 |
< |
for (int i = 0; i < ngauss; i++) { |
144 |
> |
for (int i = 0; i < mdef->get_ngauss(); i++) { |
145 |
|
double *pval = xval+i*(mdef->get_formula()->GetNpar()); |
146 |
|
double par_x = pval[mdef->get_indiv_max_x()]; |
147 |
|
double par_y = pval[mdef->get_indiv_max_y()]; |
159 |
|
double xhi = energy.Xlo; |
160 |
|
double ylo = energy.Ylo; |
161 |
|
double yhi = energy.Yhi; |
162 |
< |
chisquare += 2.0*formula_int(xlo, xhi, ylo, yhi, pval, |
162 |
> |
chisquare += 2.0*mdef->formula_int(xlo, xhi, ylo, yhi, pval, |
163 |
|
XbinSize, YbinSize, xval); |
164 |
|
} |
165 |
|
if (right) { |
167 |
|
double xhi = par_x + cutoff_rad; |
168 |
|
double ylo = energy.Ylo; |
169 |
|
double yhi = energy.Yhi; |
170 |
< |
chisquare += 2.0*formula_int(xlo, xhi, ylo, yhi, pval, |
170 |
> |
chisquare += 2.0*mdef->formula_int(xlo, xhi, ylo, yhi, pval, |
171 |
|
XbinSize, YbinSize, xval); |
172 |
|
} |
173 |
|
if (top) { |
175 |
|
double xhi = right ? par_x + cutoff_rad : energy.Xhi; |
176 |
|
double ylo = energy.Yhi; |
177 |
|
double yhi = par_y + cutoff_rad; |
178 |
< |
chisquare += 2.0*formula_int(xlo, xhi, ylo, yhi, pval, |
178 |
> |
chisquare += 2.0*mdef->formula_int(xlo, xhi, ylo, yhi, pval, |
179 |
|
XbinSize, YbinSize, xval); |
180 |
|
} |
181 |
|
if (bottom) { |
183 |
|
double xhi = right ? par_x + cutoff_rad : energy.Xhi; |
184 |
|
double ylo = par_y - cutoff_rad; |
185 |
|
double yhi = energy.Ylo; |
186 |
< |
chisquare += 2.0*formula_int(xlo, xhi, ylo, yhi, pval, |
186 |
> |
chisquare += 2.0*mdef->formula_int(xlo, xhi, ylo, yhi, pval, |
187 |
|
XbinSize, YbinSize, xval); |
188 |
|
} |
189 |
|
} |
312 |
|
} |
313 |
|
} |
314 |
|
|
323 |
– |
// translation of CERN's ERFC function |
324 |
– |
double erfc(double x) { |
325 |
– |
bool lef = false; |
326 |
– |
double p2[5], q2[5]; |
327 |
– |
|
328 |
– |
const double z1 = 1.0, hf = z1/2.0, c1 = 0.56418958; |
329 |
– |
const double vmax = 7.0; |
330 |
– |
const double switch_ = 4.0; |
331 |
– |
|
332 |
– |
double p10 = 3.6767877, q10 = 3.2584593, p11 = -9.7970465e-2; |
333 |
– |
p2[0] = 7.3738883; |
334 |
– |
q2[0] = 7.3739609; |
335 |
– |
p2[1] = 6.8650185; |
336 |
– |
q2[1] = 1.5184908e1; |
337 |
– |
p2[2] = 3.0317993; |
338 |
– |
q2[2] = 1.2795530e1; |
339 |
– |
p2[3] = 5.6316962e-1; |
340 |
– |
q2[3] = 5.3542168; |
341 |
– |
p2[4] = 4.3187787e-5; |
342 |
– |
q2[4] = 1.0; |
343 |
– |
|
344 |
– |
double p30 = -1.2436854e-1, q30 = 4.4091706e-1, p31 = -9.6821036e-2; |
345 |
– |
|
346 |
– |
double v = fabs(x); |
347 |
– |
double y, h, hc, ap, aq; |
348 |
– |
if (v < hf) { |
349 |
– |
y = v*v; |
350 |
– |
h = x*(p10 + p11*y)/(q10 + y); |
351 |
– |
hc = 1.0 - h; |
352 |
– |
} |
353 |
– |
else { |
354 |
– |
if (v < switch_) { |
355 |
– |
ap = p2[4]; |
356 |
– |
aq = q2[4]; |
357 |
– |
for (int i = 3; i >= 0; i--) { |
358 |
– |
ap = p2[i] + v*ap; |
359 |
– |
aq = q2[i] + v*aq; |
360 |
– |
} |
361 |
– |
hc = exp(-v*v)*ap/aq; |
362 |
– |
h = 1.0 - hc; |
363 |
– |
} |
364 |
– |
else if (v < vmax) { |
365 |
– |
y = 1/v/v; |
366 |
– |
hc = exp(-v*v)*(c1 + y*(p30 + p31*y)/(q30 + y))/v; |
367 |
– |
h = 1.0 - hc; |
368 |
– |
} |
369 |
– |
// for very big values we can save us any calculation |
370 |
– |
// and the FP-exceptions from exp |
371 |
– |
else { |
372 |
– |
h = 1.0; |
373 |
– |
hc = 0.0; |
374 |
– |
} |
375 |
– |
if (x < 0) { |
376 |
– |
h = -h; |
377 |
– |
hc = 2.0 - hc; |
378 |
– |
} |
379 |
– |
} |
380 |
– |
if (lef) { |
381 |
– |
return h; |
382 |
– |
} |
383 |
– |
else { |
384 |
– |
return hc; |
385 |
– |
} |
386 |
– |
} |
387 |
– |
|
388 |
– |
// translation of CERN's PROB function |
389 |
– |
double prob(double x, int n) { |
390 |
– |
const char *name = "PROB"; |
391 |
– |
char errtxt[80]; |
392 |
– |
|
393 |
– |
const double r1 = 1.0, |
394 |
– |
hf = r1/2.0, th = r1/3.0, f1 = 2.0*r1/9.0; |
395 |
– |
const double c1 = 1.128379167095513; |
396 |
– |
const int nmax = 300; |
397 |
– |
// maximum chi2 per df for df >= 2., if chi2/df > chipdf prob=0 |
398 |
– |
const double chipdf = 100.0; |
399 |
– |
const double xmax = 174.673, xmax2 = 2.0*xmax; |
400 |
– |
const double xlim = 24.0; |
401 |
– |
const double eps = 1e-30; |
402 |
– |
|
403 |
– |
double y = x; |
404 |
– |
double u = hf*y; |
405 |
– |
double h, w, s, t, fi, e; |
406 |
– |
int m; |
407 |
– |
if (n < 0) { |
408 |
– |
h = 0.0; |
409 |
– |
sprintf(errtxt, "n = %d < 1", n); |
410 |
– |
cerr << "PROB: G100.1: "<<errtxt; |
411 |
– |
} |
412 |
– |
else if (y < 0.0) { |
413 |
– |
h = 0.0; |
414 |
– |
sprintf(errtxt, "x = %f < 0", n); |
415 |
– |
cerr << "PROB: G100.2: "<<errtxt; |
416 |
– |
} |
417 |
– |
else if (y == 0.0 || n/20 > y) { |
418 |
– |
h = 1.0; |
419 |
– |
} |
420 |
– |
else if (n == 1) { |
421 |
– |
w = sqrt(u); |
422 |
– |
if (w < xlim) { |
423 |
– |
h = erfc(w); |
424 |
– |
} |
425 |
– |
else { |
426 |
– |
h = 0.0; |
427 |
– |
} |
428 |
– |
} |
429 |
– |
else if (n > nmax) { |
430 |
– |
s = r1 / ((double)n); |
431 |
– |
t = f1 * s; |
432 |
– |
w = (pow(y*s, th) - (1.0 - t)) / sqrt(2.0*t); |
433 |
– |
if (w < -xlim) { |
434 |
– |
h = 1.0; |
435 |
– |
} |
436 |
– |
else if (w < xlim) { |
437 |
– |
h = hf * erfc(w); |
438 |
– |
} |
439 |
– |
else { |
440 |
– |
h = 0.0; |
441 |
– |
} |
442 |
– |
} |
443 |
– |
else { |
444 |
– |
m = n/2; |
445 |
– |
if (u < xmax2 && (y/n) <= chipdf) { |
446 |
– |
s = exp(-hf*u); |
447 |
– |
t = s; |
448 |
– |
e = s; |
449 |
– |
if (2*m == n) { |
450 |
– |
fi = 0.0; |
451 |
– |
for (int i = 1; i < m; i++) { |
452 |
– |
fi += 1.0; |
453 |
– |
t = u*t/fi; |
454 |
– |
s += t; |
455 |
– |
} |
456 |
– |
h = s*e; |
457 |
– |
} |
458 |
– |
else { |
459 |
– |
fi = 1.0; |
460 |
– |
for (int i = 1; i < m; i++) { |
461 |
– |
fi += 2.0; |
462 |
– |
t = t*y/fi; |
463 |
– |
s += t; |
464 |
– |
} |
465 |
– |
w = sqrt(u); |
466 |
– |
if (w < xlim) { |
467 |
– |
h = c1*w*s*e + erfc(w); |
468 |
– |
} |
469 |
– |
else { |
470 |
– |
h = 0.0; |
471 |
– |
} |
472 |
– |
} |
473 |
– |
} |
474 |
– |
else { |
475 |
– |
h = 0.0; |
476 |
– |
} |
477 |
– |
} |
478 |
– |
if (h > eps) { |
479 |
– |
return h; |
480 |
– |
} |
481 |
– |
else { |
482 |
– |
return 0.0; |
483 |
– |
} |
484 |
– |
} |
485 |
– |
|
315 |
|
void fill_histo_with_vec(TH2 *hist, vector< vector<double> > &vec) { |
316 |
|
for (int i = 0; i < vec.size(); i++) { |
317 |
|
for (int j = 0; j < vec[i].size(); j++) { |
320 |
|
} |
321 |
|
} |
322 |
|
|
323 |
< |
void par_init(TH2 *hist, TMinuit *gMinuit, vector<TString> &pars, |
323 |
> |
void model_def::par_init(TH2 *hist, TMinuit *gMinuit, vector<TString> &pars, |
324 |
|
double *pval, double *perr, double *plo, double *phi, |
325 |
< |
int npar, results r) { |
326 |
< |
int npar1 = npar - mdef->get_formula()->GetNpar(); |
325 |
> |
int npar, FitResults r) { |
326 |
> |
int npar1 = npar - get_formula()->GetNpar(); |
327 |
|
bool init_spec_pars = false; |
328 |
< |
if (ngauss <= mdef->get_n_special_par_sets()) { |
328 |
> |
if (ngauss <= get_n_special_par_sets()) { |
329 |
|
init_spec_pars = true; |
330 |
|
for (int k = 0; k < ngauss; k++) { |
331 |
< |
int ipar0 = k*mdef->get_formula()->GetNpar(); // index of indiv par 0 |
332 |
< |
for (int l = 0; l < mdef->get_formula()->GetNpar(); l++) { |
331 |
> |
int ipar0 = k*get_formula()->GetNpar(); // index of indiv par 0 |
332 |
> |
for (int l = 0; l < get_formula()->GetNpar(); l++) { |
333 |
|
int ipar = ipar0 + l; |
334 |
|
if (ipar < npar) |
335 |
< |
mdef->get_special_par(k, l, pval[ipar], perr[ipar], plo[ipar], |
335 |
> |
get_special_par(k, l, pval[ipar], perr[ipar], plo[ipar], |
336 |
|
phi[ipar]); |
337 |
|
else |
338 |
|
cerr << "WARNING: Attempt to set parameter out of index range!" |
394 |
|
for (int j = 1; j <= hist_sub->GetYaxis()->GetNbins(); j++) { |
395 |
|
double nu = energy.bins[i-1][j-1] - sub_energy[i-1][j-1]; |
396 |
|
if (hist_sub->GetBinContent(i, j) |
397 |
< |
- 3.0*pow(mdef->chisquare_error(nu), 2.0) > max_sub) { |
397 |
> |
- 3.0*pow(chisquare_error(nu), 2.0) > max_sub) { |
398 |
|
max_sub = hist_sub->GetBinContent(i, j); |
399 |
|
max_x = static_cast<double>(i-1)*XbinSize |
400 |
|
+ hist_sub->GetXaxis()->GetXmin(); |
405 |
|
} |
406 |
|
|
407 |
|
for (int i = npar1; i < npar; i++) { |
408 |
< |
if (mdef->get_indiv_max_E() == i - npar1) { |
408 |
> |
if (get_indiv_max_E() == i - npar1) { |
409 |
|
pval[i] = max_sub / XbinSize / YbinSize; |
410 |
|
perr[i] = mdef->chisquare_error(pval[i])*0.5; |
411 |
|
plo[i] = 0.0; |
412 |
|
phi[i] = 1.0e6; |
413 |
|
} |
414 |
< |
else if (mdef->get_indiv_max_x() == i - npar1) { |
414 |
> |
else if (get_indiv_max_x() == i - npar1) { |
415 |
|
pval[i] = max_x; |
416 |
|
perr[i] = (energy.Xhi - energy.Xlo) / static_cast<double>(hist->GetXaxis()->GetNbins()); |
417 |
|
plo[i] = 0.0; |
418 |
|
phi[i] = 0.0; |
419 |
|
} |
420 |
< |
else if (mdef->get_indiv_max_y() == i - npar1) { |
420 |
> |
else if (get_indiv_max_y() == i - npar1) { |
421 |
|
pval[i] = max_y; |
422 |
|
perr[i] = (energy.Yhi - energy.Ylo) / static_cast<double>(hist->GetYaxis()->GetNbins()); |
423 |
|
plo[i] = 0.0; |
425 |
|
} |
426 |
|
else { |
427 |
|
string dummy; |
428 |
< |
mdef->get_indiv_par(i-npar1, dummy, pval[i], perr[i], plo[i], phi[i]); |
428 |
> |
get_indiv_par(i-npar1, dummy, pval[i], perr[i], plo[i], phi[i]); |
429 |
|
} |
430 |
|
} |
431 |
|
} |
432 |
< |
int n_pars_to_set = init_spec_pars ? ngauss*mdef->get_formula()->GetNpar() |
432 |
> |
int n_pars_to_set = init_spec_pars ? ngauss*get_formula()->GetNpar() |
433 |
|
: npar - npar1; |
434 |
|
int init_par_to_set = init_spec_pars ? 0 : npar1; |
435 |
|
for (int i = 0; i < n_pars_to_set; i++) { |
436 |
|
double dummy; |
437 |
|
string s; |
438 |
|
int ipar = i + init_par_to_set; |
439 |
< |
mdef->get_indiv_par(i % mdef->get_formula()->GetNpar(), s, |
439 |
> |
get_indiv_par(i % get_formula()->GetNpar(), s, |
440 |
|
dummy, dummy, dummy, dummy); |
441 |
|
ostringstream par_oss; |
442 |
< |
par_oss << s << ipar / (mdef->get_formula()->GetNpar()) << flush; |
442 |
> |
par_oss << s << ipar / (get_formula()->GetNpar()) << flush; |
443 |
|
try { |
444 |
|
pars.at(ipar) = TString(par_oss.str()); |
445 |
|
} |
456 |
|
} |
457 |
|
} |
458 |
|
|
459 |
< |
results fit_histo(TH2 *hist, vector<trouble> &t_vec, |
460 |
< |
void (*cc_minuit)(TMinuit *, TH2 *, int), |
461 |
< |
int start_ngauss, |
462 |
< |
int rebinX, int rebinY, |
463 |
< |
double P_cutoff_val) { |
459 |
> |
FitResults fit_histo(TH2 *hist, vector<trouble> &t_vec, |
460 |
> |
void (*cc_minuit)(TMinuit *, TH2 *, int), |
461 |
> |
int start_ngauss, |
462 |
> |
int rebinX, int rebinY, |
463 |
> |
double P_cutoff_val) { |
464 |
|
TMinuit *gMinuit = new TMinuit(); |
465 |
|
int npar_indiv = mdef->get_formula()->GetNpar(); |
466 |
|
int istat, erflg; |
467 |
< |
results r; |
467 |
> |
FitResults r; |
468 |
|
|
469 |
|
gMinuit->SetFCN(fcn); |
470 |
|
gMinuit->mninit(5, 6, 7); |
501 |
|
start_ngauss = 1; |
502 |
|
} |
503 |
|
|
504 |
< |
for (ngauss = start_ngauss; |
505 |
< |
ngauss <= (MAX_GAUSS > start_ngauss ? MAX_GAUSS : start_ngauss); |
506 |
< |
ngauss++) { |
504 |
> |
for (mdef->set_ngauss(start_ngauss); |
505 |
> |
mdef->get_ngauss() <= |
506 |
> |
(MAX_GAUSS > start_ngauss ? MAX_GAUSS : start_ngauss); |
507 |
> |
mdef->set_ngauss(mdef->get_ngauss()+1)) { |
508 |
> |
|
509 |
> |
int ngauss = mdef->get_ngauss(); |
510 |
|
t_vec.resize(t_vec.size() + 1); |
511 |
< |
int npar = npar_indiv*ngauss; |
511 |
> |
int npar = npar_indiv*mdef->get_ngauss(); |
512 |
|
double pval[256], perr[256], plo[256], phi[256]; |
513 |
|
if (npar > 256) { |
514 |
|
cerr << "Parameter overload" << endl; |
517 |
|
vector<TString> pars(npar); |
518 |
|
|
519 |
|
// initialize parameters |
520 |
< |
par_init(hist, gMinuit, pars, pval, perr, plo, phi, npar, r); |
520 |
> |
mdef->par_init(hist, gMinuit, pars, pval, perr, plo, phi, npar, r); |
521 |
|
|
522 |
|
// minimize |
523 |
|
double chisquare, edm, errdef; |
598 |
|
ndof -= npar; |
599 |
|
|
600 |
|
r.chisquare.push_back(chisquare); |
601 |
< |
double P = prob(chisquare, ndof); |
601 |
> |
double P = TMath::Prob(chisquare, ndof); |
602 |
|
if (P > P_cutoff_val) { |
603 |
|
break; |
604 |
|
} |