Presentations/Kstmumu/TMVA2/TMVA2.tex at 62126323ae48e6645a6a6d31d9a17d79c107f1e2

Fork: 0
mchrzasz / Presentations
Find file
Newer
Older
Presentations / Kstmumu / TMVA2 / TMVA2.tex
Marcin Chrzaszcz on 22 Dec 2013 13 KB updated presentation, after blending presetantion
Raw Blame History
\documentclass[]{beamer}
\setbeamertemplate{navigation symbols}{}
\usepackage{beamerthemesplit}
\useoutertheme{infolines}
\usecolortheme{dolphin}
%\usetheme{Warsaw}
\usetheme{progressbar} 
\usecolortheme{progressbar}
\usefonttheme{progressbar}
\useoutertheme{progressbar}
\useinnertheme{progressbar}
\usepackage{graphicx}
%\usepackage{amssymb,amsmath}
\usepackage[latin1]{inputenc}
\usepackage{amsmath}
\newcommand\abs[1]{\left|#1\right|}
\usepackage{iwona}
\usepackage{hepparticles}
\usepackage{hepnicenames}
\usepackage{hepunits}
\progressbaroptions{imagename=images/lhcb}
%\usetheme{Boadilla}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
\definecolor{mygreen}{cmyk}{0.82,0.11,1,0.25}
\setbeamertemplate{blocks}[rounded][shadow=false]
\addtobeamertemplate{block begin}{\pgfsetfillopacity{0.8}}{\pgfsetfillopacity{1}}
\setbeamercolor{structure}{fg=mygreen}
\setbeamercolor*{block title example}{fg=mygreen!50,
bg= blue!10}
\setbeamercolor*{block body example}{fg= blue,
bg= blue!5}





%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\beamersetuncovermixins{\opaqueness<1>{25}}{\opaqueness<2->{15}}
\title{New proposal for the $\PB \to \PKstar \mu \mu$ selection}  
\author{\underline{Marcin Chrzaszcz}$^{1,2}$, Tatiana Likhomanenko$^{3,4}$,\\ Andrey Ustyuzhanin$^{3,4,5}$}
\date{\today} 

\begin{document}

{
\institute{$^1$ University of Zurich, $^2$ Institute of Nuclear Physics, $^3$ Yandex, $^4$ Kurchatov Institute, $^5$ Imperial College}
\setbeamertemplate{footline}{} 
\begin{frame}
\logo{
\vspace{2 mm}
\includegraphics[height=1cm,keepaspectratio]{images/ifj.png}~
\includegraphics[height=1cm,keepaspectratio]{images/uzh.jpg}}

  \titlepage
\end{frame}
}

\institute{UZH,IFJ} 


\section[Outline]{}
\begin{frame}
\tableofcontents
\end{frame}

%normal slides

	

\section{Chopping technique}
\subsection{General idea}

\begin{frame}\frametitle{Chopping Data Set, How to}
{~}
\begin{center}

\begin{columns}
\column{2in}
\includegraphics[scale=.14]{images/data2.png}

\column{3in}
1. Reshuffling the events to guarantee the uniformity of the data.
\end{columns}

\begin{columns}
\column{2in}
\includegraphics[scale=.14]{images/data3.png}

\column{3in}
2. Chopping in sub-samples.
\end{columns}

\begin{columns}
\column{2in}
\includegraphics[scale=.14]{images/data4.png}

\column{3in}
3. Training using n-1 sub-samples and applying the result on the remaining one (iteratively) \\
Increase in the statistics used in the training (more stable MVA response), no bias in the result :-)
\end{columns}


\end{center}
\end{frame}

\subsection{Performance gain}

\begin{frame}\frametitle{Chopping performance (I)}
{~}
\begin{center}
\begin{itemize}
\item Chopping technique studied with MatrixNet and the BDT (TMVA)
\item Comparison of ROC curves for different sampling in the chopping procedure
\item Comparison repeated for different input variable configurations

\end{itemize}

\end{center}
\end{frame}

\begin{frame}\frametitle{Chopping performance (II)}
{~}
\begin{center}
%\begin{Large}
Comparison performed using as input: var5 + hadron DLL = BASE
%\end{Large}
\begin{columns}

\column{2.5in}
\center MatrixNet\\
\center \includegraphics[scale=.28]{images/foldsPIDK-2-5-9.png}
\column{2.5in}
%foldsPIDK-2-5-9.png
\center TMVA \\
\center \includegraphics[scale=.15]{images/fold_base.png}

\end{columns}
Better performance with 9 sub-samples for training. 
\end{center}
\end{frame}

\begin{frame}\frametitle{Chopping performance (III)}
{~}
\begin{center}
%\begin{Large}
Comparison performed using as input: base + ISO + TAUERR + MUPID\\
\begin{itemize}
\item \small Data-MC comparison and single variable effect in the BDT performance in few slides.
\end{itemize}
%\end{Large}
\begin{columns}

\column{2.5in}
MatrixNet 
\center \includegraphics[scale=.22]{images/folds2-5-9.png}
\column{2.5in}
%foldsPIDK-2-5-9.png
TMVA 
\center \includegraphics[scale=.1115]{images/FUCK.png}

\end{columns}

\end{center}
\small  Better performance with 9 sub-samples for training: result more evident here than with 
   less variables as expected from statistics
\end{frame}


\begin{frame}\frametitle{Implication for the analysis}
{~}
\begin{itemize}
\item The tuple to be analyzed will have only 1 branch which contains the different MVAs for the different subsamples.
\item The MC for the acceptance correction will contain one branch that is the average of the different BDTs.
\item Thanks to the reshuffling the BDT response over many sample is the same as the average of the BDTs.
%\item Complication is only in the training phase and preparing the ntuples, after that the analysis goes exactly in the same way as having only 1 BDT.
\end{itemize}
\textbf{Complication is only in the training phase and preparing the ntuples, after that the analysis goes exactly in the same way as having only 1 BDT.}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}\frametitle{Chopping conclusions}
{~}
\begin{itemize}
\item  Gain in performance with the chopping technique even more evident with more variables (as expected).
\item Almost no extra complication/work needed in the data analysis.
\item In the BACKUPS you have chopping for different configurations of variables.

\end{itemize}
\end{frame}


\section{Proposal of new variables}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}\frametitle{Proposal of new variables for the MVA}
{~}
Definitions:\\
\begin{itemize}
\item Last BDT:\\ BDT presented on $21^{th}$ of August 2013.  Includes:Var5+probNN($\pi$, $\PK$, $\mu$) and isolation.
\item Baseline:\\ 
Var5+PiPIDK+KPDK
\item New variables proposed for MVA:
\begin{itemize}
\item TAUERR
\item FD
\item MUPID
\item ISO
\end{itemize}
\end{itemize}




\end{frame}

\subsection{MC/DATA comparison}
\begin{frame}\frametitle{MC/DATA Comparison}
\begin{columns}
\column{2.5in}
\center \includegraphics[scale=.15]{images/B0_FD.png}

\column{2.5in}
\center \includegraphics[scale=.15]{images/B0_TAU.png}

\end{columns}

\begin{columns}
\column{1.6in}
\center \includegraphics[scale=.15]{images/B0_TAUERR.png}

\column{1.6in}
\center \includegraphics[scale=.15]{images/iso.png}

\column{1.6in}
\center \includegraphics[scale=.15]{images/iso_NEW.png}

\end{columns}



\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
\subsection{Performance gain (ROC, 1:1 comparison last BDT) }
\begin{frame}\frametitle{ New variables performance(I) }

\begin{columns}
\column{2.5in}
\center PID=DLL\\
\center \includegraphics[scale=.15]{images/ROC_ALL.png}

\column{2.5in}
\center PID=ProbNN\\

\center \includegraphics[scale=.15]{images/ROC_ALL_PROBNN.png}

\end{columns}
\begin{itemize}
\item ROC curve comparison of several variables configurations.
\item Best performance adding  FD, MUPID, TAUERR and ISO (pink line)
\item Best performance of the ProbNN
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
\begin{frame}\frametitle{ New variables performance(II), ISO GAIN }

\begin{columns}
\column{2.5in}
\center PID=DLL\\
\center \includegraphics[scale=.15]{images/ROC_ALL_ISO.png}

\column{2.5in}
\center PID=ProbNN\\

\center \includegraphics[scale=.15]{images/ROC_ALL_PROBNN_ISO.png}

\end{columns}
\begin{itemize}
\item Include ISO

\end{itemize}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
\begin{frame}\frametitle{ New variables performance(III),\\ ISO+MUPID+TAUERR+FD }

\begin{columns}
\column{2.5in}
\center PID=DLL\\
\center \includegraphics[scale=.15]{images/ROC_ALL_ISO_MUPID.png}
\begin{itemize}
\item Include ISO and MUPID

\end{itemize}
\column{2.5in}
\center PID=ProbNN\\

\center \includegraphics[scale=.15]{images/ROC_ALL_PROBNN_ISO_MUPID.png}
\begin{itemize}
\item Include ISO and MUPID

\end{itemize}
\end{columns}


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
\begin{frame}\frametitle{ New variables performance(IV) }
\begin{itemize}
\item Good data/MC agreement (reasonable for the ISO, and not worse than the old one)
\item Best performance obtained adding  FD, MUPID, TAUERR and ISO 
\item Best performance of the ProbNN
\item MUPID will be also useful against peaking misidentified background.
\end{itemize}


\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\section{Conclusions}

\begin{frame}\frametitle{Selections comparison}
\begin{itemize}

\item MatrixNet outperformed TMVA.
\item All fits made in the same way as Sam did(fixing the same parameters to the same values)
\item 1:1 comparison with the last BDT and the baseline.
\item Same data for both.
\end{itemize}


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}\frametitle{1:1 comparison with Last BDT (I) }
\begin{center}

\begin{tabular}{ |c||c|c||c|c||c|c| }
  \hline
  $q^2$  &\multicolumn{2}{|c|}{Last BDT} & \multicolumn{2}{|c|}{MatrixNet}\\ \hline \hline
 $[GeV^2]$ & Signal & Bck & Signal & Bck  \\ \hline
  $0.1,2$ & $407 \pm 25$ & $58 \pm 7$ &  $412 \pm 22 $ & $39 \pm 5$ \\ \hline
  $2, 4.3$ & $202 \pm 19$ & $95 \pm 7$ & $220 \pm 17$ & $54 \pm 5$ \\ \hline 
  $4.3, 8.68$ & $573 \pm 32$ & $170 \pm 10$  & $591 \pm 28 $ & $131 \pm 8$ \\ \hline 
  $10.09, 12.86$ & $508 \pm 26$ & $93 \pm 7$ & $508 \pm 25$ & $88 \pm 7$ \\ \hline 
  $14.18, 16$ & $310 \pm 20$ & $49 \pm 5$  & $324 \pm 20$ & $43. \pm 6$ \\ \hline 	
  $16, 19$ & $359 \pm 29$ & $34 \pm 8$ & $373 \pm 21$ & $35 \pm 5$ \\ \hline 	
  $0.1, 19$ & $2355.2 \pm 63$ & $510 \pm 19$ & $2365 \pm 55 $ & $403 \pm 15$ \\ \hline 	
\end{tabular}\\
\end{center}

%This is more then $5\sigma$ significant improvement in back of the envelope calculation.

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}\frametitle{Comparison between different configuration (II) }
\begin{center}

\begin{tabular}{ |c||c|c||c|c||c|c| }
  \hline
  $q^2$  &\multicolumn{2}{|c|}{MN Baseline\footnote{Var+KPIDLL}} & \multicolumn{2}{|c|}{MN FULL\footnote{Var5+ProbNN+Iso+FD+TAUERR}}\\ \hline \hline
 $[GeV^2]$ & Signal & Bck & Signal & Bck  \\ \hline
   $0.1,2$ & $384 \pm 22$ & $66 \pm 8$ &  $419 \pm 21 $ & $37 \pm 5$ \\ \hline
  $2, 4.3$ & $249 \pm 21$ & $120 \pm 9$ & $225 \pm 18$ & $50 \pm 8$ \\ \hline 
  $4.3, 8.68$ & $641 \pm 32$ & $255 \pm 12$  & $591 \pm 28 $ & $130 \pm 8$ \\ \hline 
  $10.09, 12.86$ & $534 \pm 27$ & $140 \pm 9$ & $510 \pm 25$ & $86 \pm 7$ \\ \hline 
  $14.18, 16$ & $328 \pm 21$ & $73 \pm 6$  & $328 \pm 20$ & $46. \pm 5$ \\ \hline 	
  $16, 19$ & $386 \pm 21$ & $65 \pm 8$ & $361 \pm 20$ & $36 \pm 5$ \\ \hline 	
   $0.1, 19$ & $2501 \pm 60$ & $741 \pm 22$ & $2369 \pm 55 $ & $396 \pm 15$ \\ \hline 	
\end{tabular}\\
\end{center}

%This is more then $5\sigma$ significant improvement in back of the envelope calculation.

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}\frametitle{Comparison between different configuration (III) }
\begin{center}

\begin{tabular}{ |c||c|c||c|c||c|c| }
  \hline
  $q^2$  &\multicolumn{2}{|c|}{MN FULL DLL\footnote{Var5+DLL+Iso+FD+TAUERR}} & \multicolumn{2}{|c|}{MN FULL\footnote{Var5+ProbNN+Iso+FD+TAUERR}}\\ \hline \hline
 $[GeV^2]$ & Signal & Bck & Signal & Bck  \\ \hline
   $0.1,2$ & $365 \pm 23$ & $43 \pm 6$ &  $419 \pm 21 $ & $37 \pm 5$ \\ \hline
  $2, 4.3$ & $227 \pm 21$ & $65 \pm 6$ & $225 \pm 18$ & $50 \pm 8$ \\ \hline 
  $4.3, 8.68$ & $599 \pm 29$ & $154 \pm 9$  & $591 \pm 28 $ & $130 \pm 8$ \\ \hline 
  $10.09, 12.86$ & $511 \pm 24$ & $164 \pm 7$ & $510 \pm 25$ & $86 \pm 7$ \\ \hline 
  $14.18, 16$ & $321 \pm 20$ & $47 \pm 6$  & $328 \pm 20$ & $46. \pm 5$ \\ \hline 	
  $16, 19$ & $364 \pm 21$ & $39 \pm 6$ & $361 \pm 20$ & $36 \pm 5$ \\ \hline 	
   $0.1, 19$ & $2373 \pm 56$ & $468 \pm 16$ & $2369 \pm 55 $ & $396 \pm 15$ \\ \hline 	
\end{tabular}\\
\end{center}

%This is more then $5\sigma$ significant improvement in back of the envelope calculation.

\end{frame}









\begin{frame}\frametitle{MatrixNet efficiency}
\begin{itemize}
%\item MatrixNet Efficiency
\item Sim08 PHSP
\item Efficiency defined as $ \epsilon= n_{evts}(after MN)/n_{evts}(after presel.)$

\end{itemize}



\begin{columns}

\column{1.2in}
\begin{center}


\includegraphics[scale=.12]{images/phi.png}\\
\end{center}

\column{1.2in}
\begin{center}
\includegraphics[scale=.12]{images/q2.png}\\

\end{center}
\column{1.2in}
\begin{center}
\includegraphics[scale=.12]{images/thetak.png}\\
%\includegraphics[scale=.125]{images/thetal.png}\\
\end{center}
\column{1.2in}
\begin{center}
\includegraphics[scale=.12]{images/thetal.png}\\
\end{center}
\end{columns}
\begin{itemize}
\item Flat response in the angles and in $q^2$
\end{itemize}

\end{frame}
\begin{frame}\frametitle{Conclusions}

\begin{itemize}
\item Gain using the chopping technique, without extra complications in the analysis procedure
\item Gain in performance using new variables which showed good agreement with MC
\item Gain in performance using MatrixNet (respect to previous BDTs)
\item Reduced background events keeping same signal efficiency.
\end{itemize}



\end{frame}



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\begin{frame}\frametitle{~}
{~}

\begin{LARGE}
BACKUP
\end{LARGE}

\end{frame}

\begin{frame}\frametitle{~}
{~}
Base+MCISO\\
\includegraphics[scale=.18]{images/folds2.png}



\end{frame}

\begin{frame}\frametitle{~}
{~}
Base+ISO\\
\includegraphics[scale=.18]{images/FUCK2.png}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%



\begin{frame}\frametitle{~}
{~}

\includegraphics[scale=.28]{images/Compare.png}

\end{frame}




\begin{frame}\frametitle{MN:Iso+var+probNNx4}
{~}
Base+ISO\\
\includegraphics[scale=.28]{images/MN_0p45.png}

\end{frame}
\begin{frame}\frametitle{BASE}
{~}
\includegraphics[scale=.28]{images/MN_0p2.png}

\end{frame}

\begin{frame}\frametitle{FULL}
{~}
\includegraphics[scale=.28]{images/MN_0p48.png}

\end{frame}

\begin{frame}\frametitle{matrix Net ROC}
{~}
\includegraphics[scale=.4]{images/10_15vars(7-14).png}

\end{frame}
\end{document}