ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/claudioc/OSNote2010/datadriven.tex
(Generate patch)

Comparing UserCode/claudioc/OSNote2010/datadriven.tex (file contents):
Revision 1.10 by benhoob, Mon Nov 8 11:06:03 2010 UTC vs.
Revision 1.25 by benhoob, Sat Nov 20 15:26:05 2010 UTC

# Line 3 | Line 3
3   We have developed two data-driven methods to
4   estimate the background in the signal region.
5   The first one exploits the fact that
6 < \met and \met$/\sqrt{\rm SumJetPt}$ are nearly
6 > SumJetPt and \met$/\sqrt{\rm SumJetPt}$ are nearly
7   uncorrelated for the $t\bar{t}$ background
8   (Section~\ref{sec:abcd});  the second one
9   is based on the fact that in $t\bar{t}$ the
# Line 12 | Line 12 | nearly the same as the $P_T$ of the pair
12   from $W$-decays, which is reconstructed as \met in the
13   detector.
14  
15 < In 30 pb$^{-1}$ we expect $\approx$ 1 SM event in
16 < the signal region.  The expectations from the LMO
17 < and LM1 SUSY benchmark points are 5.6 and
18 < 2.2 events respectively.
15 >
16   %{\color{red} I took these
17   %numbers from the twiki, rescaling from 11.06 to 30/pb.
18   %They seem too large...are they really right?}
# Line 24 | Line 21 | and LM1 SUSY benchmark points are 5.6 an
21   \subsection{ABCD method}
22   \label{sec:abcd}
23  
24 < We find that in $t\bar{t}$ events \met and
25 < \met$/\sqrt{\rm SumJetPt}$ are nearly uncorrelated.
26 < This is demonstrated in Figure~\ref{fig:uncor}.
24 > We find that in $t\bar{t}$ events SumJetPt and
25 > \met$/\sqrt{\rm SumJetPt}$ are nearly uncorrelated,
26 > as demonstrated in Figure~\ref{fig:uncor}.
27   Thus, we can use an ABCD method in the \met$/\sqrt{\rm SumJetPt}$ vs
28   sumJetPt plane to estimate the background in a data driven way.
29  
30 < \begin{figure}[tb]
30 > %\begin{figure}[bht]
31 > %\begin{center}
32 > %\includegraphics[width=0.75\linewidth]{uncorrelated.pdf}
33 > %\caption{\label{fig:uncor}\protect Distributions of SumJetPt
34 > %in MC $t\bar{t}$ events for different intervals of
35 > %MET$/\sqrt{\rm SumJetPt}$.}
36 > %\end{center}
37 > %\end{figure}
38 >
39 > \begin{figure}[bht]
40   \begin{center}
41 < \includegraphics[width=0.75\linewidth]{uncorrelated.pdf}
41 > \includegraphics[width=0.75\linewidth]{uncor.png}
42   \caption{\label{fig:uncor}\protect Distributions of SumJetPt
43   in MC $t\bar{t}$ events for different intervals of
44 < MET$/\sqrt{\rm SumJetPt}$.}
44 > MET$/\sqrt{\rm SumJetPt}$. h1, h2, and h3 refer to the MET$/\sqrt{\rm SumJetPt}$
45 > intervals 4.5-6.5, 6.5-8.5 and $>$8.5, respectively.}
46   \end{center}
47   \end{figure}
48  
49 < \begin{figure}[bt]
49 > \begin{figure}[tb]
50   \begin{center}
51   \includegraphics[width=0.5\linewidth, angle=90]{abcdMC.pdf}
52 < \caption{\label{fig:abcdMC}\protect Distributions of SumJetPt
53 < vs. MET$/\sqrt{\rm SumJetPt}$ for SM Monte Carlo.  Here we also
47 < show our choice of ABCD regions.}
52 > \caption{\label{fig:abcdMC}\protect Distributions of MET$/\sqrt{\rm SumJetPt}$ vs.
53 > SumJetPt for SM Monte Carlo.  Here we also show our choice of ABCD regions.}
54   \end{center}
55   \end{figure}
56  
# Line 53 | Line 59 | Our choice of ABCD regions is shown in F
59   The signal region is region D.  The expected number of events
60   in the four regions for the SM Monte Carlo, as well as the BG
61   prediction AC/B are given in Table~\ref{tab:abcdMC} for an integrated
62 < luminosity of 30 pb$^{-1}$.  The ABCD method is accurate
63 < to about 10\%.
62 > luminosity of 35 pb$^{-1}$.  The ABCD method with chosen boundaries is accurate
63 > to about 20\%. As shown in Table~\ref{tab:abcdsyst}, we assess systematic uncertainties
64 > by varying the boundaries by an amount consistent with the hadronic energy scale uncertainty,
65 > which we take as $\pm$5\% for SumJetPt and $\pm$2.5\% for MET/$\sqrt{\rm SumJetPt}$, since the
66 > uncertainty on this quantity partially cancels due to the fact that it is a ratio of correlated
67 > quantities. Based on these studies we assess a correction factor $k_{ABCD} = 1.2 \pm 0.2$ to the
68 > predicted yield using the ABCD method.
69 >
70 >
71   %{\color{red} Avi wants some statement about stability
72   %wrt changes in regions.  I am not sure that we have done it and
73   %I am not sure it is necessary (Claudio).}
74  
75 < \begin{table}[htb]
75 > \begin{table}[ht]
76   \begin{center}
77   \caption{\label{tab:abcdMC} Expected SM Monte Carlo yields for
78 < 30 pb$^{-1}$ in the ABCD regions.}
79 < \begin{tabular}{|l|c|c|c|c||c|}
78 > 35 pb$^{-1}$ in the ABCD regions, as well as the predicted yield in
79 > the signal region given by A $\times$ C / B. Here `SM other' is the sum
80 > of non-dileptonic $t\bar{t}$ decays, $W^{\pm}$+jets, $W^+W^-$,
81 > $W^{\pm}Z^0$, $Z^0Z^0$ and single top.}
82 > \begin{tabular}{lccccc}
83 > \hline
84 >         sample                          &              A   &              B   &              C   &              D   &    A $\times$ C / B \\
85 > \hline
86 >
87 >
88 > \hline
89 >              sample   &                   A   &                   B   &                   C   &                   D   &                PRED  \\
90 > \hline
91 > $t\bar{t}\rightarrow \ell^{+}\ell^{-}$         &   7.96  $\pm$  0.17   &  33.07  $\pm$  0.35   &   4.81  $\pm$  0.13   &   1.20  $\pm$  0.07   &   1.16  $\pm$  0.04  \\
92 > $Z^0 \rightarrow \ell^{+}\ell^{-}$             &   0.03  $\pm$  0.03   &   1.47  $\pm$  0.38   &   0.10  $\pm$  0.10   &   0.10  $\pm$  0.10   &   0.00  $\pm$  0.00  \\
93 >            SM other                           &   0.65  $\pm$  0.06   &   2.31  $\pm$  0.13   &   0.17  $\pm$  0.03   &   0.14  $\pm$  0.03   &   0.05  $\pm$  0.01  \\
94 > \hline
95 >         total SM MC                           &   8.63  $\pm$  0.18   &  36.85  $\pm$  0.53   &   5.07  $\pm$  0.17   &   1.43  $\pm$  0.12   &   1.19  $\pm$  0.05  \\
96 > \hline
97 > \end{tabular}
98 > \end{center}
99 > \end{table}
100 >
101 >
102 >
103 > \begin{table}[ht]
104 > \begin{center}
105 > \caption{\label{tab:abcdsyst} Results of the systematic study of the ABCD method by varying the boundaries
106 > between the ABCD regions shown in Fig.~\ref{fig:abcdMC}. Here $x_1$ is the lower SumJetPt boundary and
107 > $x_2$ is the boundary separating regions A and B from C and D, their nominal values are 125 and 300~GeV,
108 > respectively. $y_1$ is the lower MET/$\sqrt{\rm SumJetPt}$ boundary and
109 > $y_2$ is the boundary separating regions B and C from A and D, their nominal values are 4.5 and 8.5~GeV$^{1/2}$,
110 > respectively.}
111 > \begin{tabular}{cccc|c}
112 > \hline
113 > $x_1$   &   $x_2$ & $y_1$   &   $y_2$ & Observed/Predicted \\
114 > \hline
115 > nominal & nominal & nominal & nominal & 1.20     \\
116 > +5\%    & +5\%    & +2.5\%  & +2.5\%  & 1.38     \\
117 > +5\%    & +5\%    & nominal & nominal & 1.31     \\
118 > nominal & nominal & +2.5\%  & +2.5\%  & 1.25     \\
119 > nominal & +5\%    & nominal & +2.5\%  & 1.32     \\
120 > nominal & -5\%    & nominal & -2.5\%  & 1.16     \\
121 > -5\%    & -5\%    & +2.5\%  & +2.5\%  & 1.21     \\
122 > +5\%    & +5\%    & -2.5\%  & -2.5\%  & 1.26     \\
123   \hline
68 Sample   & A   & B    & C   & D   & AC/D \\ \hline
69 ttdil    & 6.9 & 28.6 & 4.2 & 1.0 & 1.0  \\
70 Zjets    & 0.0 & 1.3  & 0.1 & 0.1 & 0.0  \\
71 Other SM & 0.5 & 2.0  & 0.1 & 0.1 & 0.0  \\ \hline
72 total MC & 7.4 & 31.9 & 4.4 & 1.2 & 1.0 \\ \hline
124   \end{tabular}
125   \end{center}
126   \end{table}
# Line 90 | Line 141 | In practice one has to rescale the resul
141   to account for the fact that any dilepton selection must include a
142   moderate \met cut in order to reduce Drell Yan backgrounds.  This
143   is discussed in Section 5.3 of Reference~\cite{ref:ourvictory}; for a \met
144 < cut of 50 GeV, the rescaling factor is obtained from the data as
144 > cut of 50 GeV, the rescaling factor is obtained from the MC as
145  
146   \newcommand{\ptll} {\ensuremath{P_T(\ell\ell)}}
147   \begin{center}
# Line 115 | Line 166 | There are several effects that spoil the
166   $P_T(\ell\ell)$:
167   \begin{itemize}
168   \item $Ws$ in top events are polarized.  Neutrinos are emitted preferentially
169 < forward in the $W$ rest frame, thus the $P_T(\nu\nu)$ distribution is harder
169 > parallel to the $W$ velocity while charged leptons are emitted prefertially
170 > anti-parallel. Thus the $P_T(\nu\nu)$ distribution is harder
171   than the $P_T(\ell\ell)$ distribution for top dilepton events.
172   \item The lepton selections results in $P_T$ and $\eta$ cuts on the individual
173   leptons that have no simple correspondance to the neutrino requirements.
174   \item Similarly, the \met$>$50 GeV cut introduces an asymmetry between leptons and
175   neutrinos which is only partially compensated by the $K$ factor above.
176   \item The \met resolution is much worse than the dilepton $P_T$ resolution.
177 < When convoluted with a falling spectrum in the tails of \met, this result
177 > When convoluted with a falling spectrum in the tails of \met, this results
178   in a harder spectrum for \met than the original $P_T(\nu\nu)$.
179   \item The \met response in CMS is not exactly 1.  This causes a distortion
180   in the \met distribution that is not present in the $P_T(\ell\ell)$ distribution.
# Line 133 | Line 185 | of $P_T(\ell\ell)$ and $P_T(\nu\nu)$ do
185   sources.  These events can affect the background prediction.  Particularly
186   dangerous are high $P_T$ Drell Yan events that barely pass the \met$>$ 50
187   GeV selection.  They will tend to push the data-driven background prediction up.
188 + Therefore we estimate the number of DY events entering the background prediction
189 + using the $R_{out/in}$ method as described in Sec.~\ref{sec:othBG}.
190   \end{itemize}
191  
192   We have studied these effects in SM Monte Carlo, using a mixture of generator and
# Line 155 | Line 209 | under different assumptions.  See text f
209   4&Y                        &     N          &   N      &  GEN    &   Y             &   Y    & Y          & 1.55  \\
210   5&Y                        &     N          &   N      & RECOSIM &   Y             &   Y    & Y          & 1.51  \\
211   6&Y                        &     Y          &   N      & RECOSIM &   Y             &   Y    & Y          & 1.58  \\
212 < 7&Y                        &     Y          &   Y      & RECOSIM &   Y             &   Y    & Y          & 1.18  \\
212 > 7&Y                        &     Y          &   Y      & RECOSIM &   Y             &   Y    & Y          & 1.38  \\
213 > %%%NOTE: updated value 1.18 -> 1.46 since 2/3 DY events have been removed by updated analysis selections,
214 > %%%dpt/pt cut and general lepton veto
215   \hline
216   \end{tabular}
217   \end{center}
# Line 173 | Line 229 | Going from GEN to RECOSIM, the change in
229   % by $\approx 4\%$\footnote{We find that observed/predicted changes by roughly 0.1
230   %for each 1.5\% change in \met response.}.  
231   Finally, contamination from non $t\bar{t}$
232 < events can have a significant impact on the BG prediction.  The changes between
233 < lines 6 and 7 of Table~\ref{tab:victorybad} is driven by 3
234 < Drell Yan events that pass the \met selection in Monte Carlo (thus the effect
235 < is statistically not well quantified).
232 > events can have a significant impact on the BG prediction.  
233 > %The changes between
234 > %lines 6 and 7 of Table~\ref{tab:victorybad} is driven by 3
235 > %Drell Yan events that pass the \met selection in Monte Carlo (thus the effect
236 > %is statistically not well quantified).
237  
238   An additional source of concern is that the CMS Madgraph $t\bar{t}$ MC does
239   not include effects of spin correlations between the two top quarks.  
# Line 196 | Line 253 | that the bias is at the few percent leve
253  
254   Based on the results of Table~\ref{tab:victorybad}, we conclude that the
255   naive data driven background estimate based on $P_T{(\ell\ell)}$ needs to
256 < be corrected by a factor of $ K = X \pm Y$.
256 > be corrected by a factor of $ K_C = X \pm Y$.
257   The value of this correction factor as well as the systematic uncertainty
258   will be assessed using 38X ttbar madgraph MC. In the following we use
259 < $K = 1$ for simplicity. Based on previous MC studies we foresee a correction
260 < factor of $\approx 1.2 - 1.4$, and we will assess an uncertainty
259 > $K_C = 1$ for simplicity. Based on previous MC studies we foresee a correction
260 > factor of $K_C \approx 1.2 - 1.5$, and we will assess an uncertainty
261   based on the stability of the Monte Carlo tests under
262   variations of event selections, choices of \met algorithm, etc.
263   For example, we find that observed/predicted changes by roughly 0.1
# Line 230 | Line 287 | in the ABCD method but not in the $P_T(\
287  
288   The LM points are benchmarks for SUSY analyses at CMS.  The effects
289   of signal contaminations for a couple such points are summarized
290 < in Table~\ref{tab:sigcontABCD} and~\ref{tab:sigcontPT}.
234 < Signal contamination is definitely an important
290 > in Table~\ref{tab:sigcont}. Signal contamination is definitely an important
291   effect for these two LM points, but it does not totally hide the
292   presence of the signal.
293  
294  
295   \begin{table}[htb]
296   \begin{center}
297 < \caption{\label{tab:sigcontABCD} Effects of signal contamination
298 < for the background predictions of the ABCD method including LM0 or
299 < LM1.  Results
300 < are normalized to 30 pb$^{-1}$.}
301 < \begin{tabular}{|c|c||c|c||c|c|}
246 < \hline
247 < SM         & BG Prediction  & SM$+$LM0     & BG Prediction & SM$+$LM1     & BG Prediction \\
248 < Background & SM Only        & Contribution & Including LM0 & Contribution & Including LM1  \\ \hline
249 < 1.2        & 1.0            & 6.8          & 3.7           & 3.4          & 1.3 \\
297 > \caption{\label{tab:sigcont} Effects of signal contamination
298 > for the two data-driven background estimates. The three columns give
299 > the expected yield in the signal region and the background estimates
300 > using the ABCD and $P_T(\ell \ell)$ methods. Results are normalized to 35~pb$^{-1}$.}
301 > \begin{tabular}{lccc}
302   \hline
303 < \end{tabular}
304 < \end{center}
305 < \end{table}
306 <
307 < \begin{table}[htb]
256 < \begin{center}
257 < \caption{\label{tab:sigcontPT} Effects of signal contamination
258 < for the background predictions of the $P_T(\ell\ell)$ method including LM0 or
259 < LM1.  Results
260 < are normalized to 30 pb$^{-1}$.}
261 < \begin{tabular}{|c|c||c|c||c|c|}
262 < \hline
263 < SM         & BG Prediction  & SM$+$LM0     & BG Prediction & SM$+$LM1     & BG Prediction \\
264 < Background & SM Only        & Contribution & Including LM0 & Contribution & Including LM1  \\ \hline
265 < 1.2        & 1.0            & 6.8          & 2.2           & 3.4          & 1.5 \\
303 >            &      Yield      &      ABCD    & $P_T(\ell \ell)$  \\
304 > \hline
305 > SM only     &      1.43       &      1.19    &             1.03  \\
306 > SM + LM0    &      7.90       &      4.23    &             2.35  \\
307 > SM + LM1    &      4.00       &      1.53    &             1.51  \\
308   \hline
309   \end{tabular}
310   \end{center}
311   \end{table}
312  
313 +
314 +
315 + %\begin{table}[htb]
316 + %\begin{center}
317 + %\caption{\label{tab:sigcontABCD} Effects of signal contamination
318 + %for the background predictions of the ABCD method including LM0 or
319 + %LM1.  Results
320 + %are normalized to 30 pb$^{-1}$.}
321 + %\begin{tabular}{|c|c||c|c||c|c|}
322 + %\hline
323 + %SM         & BG Prediction  & SM$+$LM0     & BG Prediction & SM$+$LM1     & BG Prediction \\
324 + %Background & SM Only        & Contribution & Including LM0 & Contribution & Including LM1  \\ \hline
325 + %1.2        & 1.0            & 6.8          & 3.7           & 3.4          & 1.3 \\
326 + %\hline
327 + %\end{tabular}
328 + %\end{center}
329 + %\end{table}
330 +
331 + %\begin{table}[htb]
332 + %\begin{center}
333 + %\caption{\label{tab:sigcontPT} Effects of signal contamination
334 + %for the background predictions of the $P_T(\ell\ell)$ method including LM0 or
335 + %LM1.  Results
336 + %are normalized to 30 pb$^{-1}$.}
337 + %\begin{tabular}{|c|c||c|c||c|c|}
338 + %\hline
339 + %SM         & BG Prediction  & SM$+$LM0     & BG Prediction & SM$+$LM1     & BG Prediction \\
340 + %Background & SM Only        & Contribution & Including LM0 & Contribution & Including LM1  \\ \hline
341 + %1.2        & 1.0            & 6.8          & 2.2           & 3.4          & 1.5 \\
342 + %\hline
343 + %\end{tabular}
344 + %\end{center}
345 + %\end{table}
346 +

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines