Newer
Older
Lecture_repo / Lectures_my / EMPP / 2017 / Lecture1 / mchrzasz.tex
@mchrzasz mchrzasz on 4 Oct 2017 34 KB added lecture 2
\documentclass[11 pt,xcolor={dvipsnames,svgnames,x11names,table}]{beamer}

\usepackage[english]{babel} 
\usepackage{polski}         


\usetheme[
	bullet=circle,		% Other option: square
	bigpagenumber,		% circled page number on lower right
	topline=true,			% colored bar at the top of the frame 
	shadow=false,			% Shading for beamer blocks
	watermark=BG_lower,	% png file for the watermark
	]{Flip}

%\logo{\kern+1.em\includegraphics[height=1cm]{SHiP-3_LightCharcoal}}
                            

\usepackage[lf]{berenis}
\usepackage[LY1]{fontenc}
\usepackage[utf8]{inputenc}

\usepackage{emerald}
\usefonttheme{professionalfonts}
\usepackage[no-math]{fontspec}		
\defaultfontfeatures{Mapping=tex-text}	% This seems to be important for mapping glyphs properly

\setmainfont{Gillius ADF}			% Beamer ignores "main font" in favor of sans font
\setsansfont{Gillius ADF}			% This is the font that beamer will use by default
% \setmainfont{Gill Sans Light}		% Prettier, but harder to read

\setbeamerfont{title}{family=\fontspec{Gillius ADF}}

\input t1augie.fd

%\newcommand{\handwriting}{\fontspec{augie}} % From Emerald City, free font
%\newcommand{\handwriting}{\usefont{T1}{fau}{m}{n}} % From Emerald City, free font
% \newcommand{\handwriting}{}	% If you prefer no special handwriting font or don't have augie

%% Gill Sans doesn't look very nice when boldfaced
%% This is a hack to use Helvetica instead
%% Usage: \textbf{\forbold some stuff}
%\newcommand{\forbold}{\fontspec{Arial}}

\usepackage{graphicx}
\usepackage[export]{adjustbox}

\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{bm}
\usepackage{colortbl}
\usepackage{mathrsfs} 			% For Weinberg-esque letters
\usepackage{cancel}				% For "SUSY-breaking" symbol
\usepackage{slashed}            % for slashed characters in math mode
\usepackage{bbm}                % for \mathbbm{1} (unit matrix)
\usepackage{amsthm}				% For theorem environment
\usepackage{multirow}			% For multi row cells in table
\usepackage{arydshln} 			% For dashed lines in arrays and tables
\usepackage{siunitx}
\usepackage{xhfill}
\usepackage{grffile}
\usepackage{textpos}
\usepackage{subfigure}
\usepackage{tikz}
\usepackage{hyperref}
%\usepackage{hepparticles}    
\usepackage[italic]{hepparticles}     

\usepackage{hepnicenames} 

% Drawing a line
\tikzstyle{lw} = [line width=20pt]
\newcommand{\topline}{%
  \tikz[remember picture,overlay] {%
    \draw[crimsonred] ([yshift=-23.5pt]current page.north west)
             -- ([yshift=-23.5pt,xshift=\paperwidth]current page.north west);}}



% % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % %
\usepackage{tikzfeynman}		% For Feynman diagrams
\usetikzlibrary{arrows,shapes}
\usetikzlibrary{trees}
\usetikzlibrary{matrix,arrows} 				% For commutative diagram
% http://www.felixl.de/commu.pdf
\usetikzlibrary{positioning}				% For "above of=" commands
\usetikzlibrary{calc,through}				% For coordinates
\usetikzlibrary{decorations.pathreplacing}  % For curly braces
% http://www.math.ucla.edu/~getreuer/tikz.html
\usepackage{pgffor}							% For repeating patterns

\usetikzlibrary{decorations.pathmorphing}	% For Feynman Diagrams
\usetikzlibrary{decorations.markings}
\tikzset{
	% >=stealth', %%  Uncomment for more conventional arrows
	vector/.style={decorate, decoration={snake}, draw},
	provector/.style={decorate, decoration={snake,amplitude=2.5pt}, draw},
	antivector/.style={decorate, decoration={snake,amplitude=-2.5pt}, draw},
	fermion/.style={draw=gray, postaction={decorate},
		decoration={markings,mark=at position .55 with {\arrow[draw=gray]{>}}}},
	fermionbar/.style={draw=gray, postaction={decorate},
		decoration={markings,mark=at position .55 with {\arrow[draw=gray]{<}}}},
	fermionnoarrow/.style={draw=gray},
	gluon/.style={decorate, draw=black,
		decoration={coil,amplitude=4pt, segment length=5pt}},
	scalar/.style={dashed,draw=black, postaction={decorate},
		decoration={markings,mark=at position .55 with {\arrow[draw=black]{>}}}},
	scalarbar/.style={dashed,draw=black, postaction={decorate},
		decoration={markings,mark=at position .55 with {\arrow[draw=black]{<}}}},
	scalarnoarrow/.style={dashed,draw=black},
	electron/.style={draw=black, postaction={decorate},
		decoration={markings,mark=at position .55 with {\arrow[draw=black]{>}}}},
	bigvector/.style={decorate, decoration={snake,amplitude=4pt}, draw},
}

% TIKZ - for block diagrams, 
% from http://www.texample.net/tikz/examples/control-system-principles/
% \usetikzlibrary{shapes,arrows}
\tikzstyle{block} = [draw, rectangle, 
minimum height=3em, minimum width=6em]




\usetikzlibrary{backgrounds}
\usetikzlibrary{mindmap,trees}	% For mind map
\newcommand{\degree}{\ensuremath{^\circ}}
\newcommand{\E}{\mathrm{E}}
\newcommand{\Var}{\mathrm{Var}}
\newcommand{\Cov}{\mathrm{Cov}}
\newcommand\Ts{\rule{0pt}{2.6ex}}       % Top strut
\newcommand\Bs{\rule[-1.2ex]{0pt}{0pt}} % Bottom strut

\graphicspath{{images/}}	% Put all images in this directory. Avoids clutter.

% SOME COMMANDS THAT I FIND HANDY
% \renewcommand{\tilde}{\widetilde} % dinky tildes look silly, dosn't work with fontspec
\newcommand{\comment}[1]{\textcolor{comment}{\footnotesize{#1}\normalsize}} % comment mild
\newcommand{\Comment}[1]{\textcolor{Comment}{\footnotesize{#1}\normalsize}} % comment bold
\newcommand{\COMMENT}[1]{\textcolor{COMMENT}{\footnotesize{#1}\normalsize}} % comment crazy bold
\newcommand{\Alert}[1]{\textcolor{Alert}{#1}} % louder alert
\newcommand{\ALERT}[1]{\textcolor{ALERT}{#1}} % loudest alert
%% "\alert" is already a beamer pre-defined
\newcommand*{\Scale}[2][4]{\scalebox{#1}{$#2$}}%

\def\Put(#1,#2)#3{\leavevmode\makebox(0,0){\put(#1,#2){#3}}}

\usepackage{gmp}
\usepackage[final]{feynmp-auto}

\usepackage[backend=bibtex,style=numeric-comp,firstinits=true]{biblatex}
\bibliography{bib}
\setbeamertemplate{bibliography item}[text]

\makeatletter\let\frametextheight\beamer@frametextheight\makeatother

% suppress frame numbering for backup slides
% you always need the appendix for this!
\newcommand{\backupbegin}{
	\newcounter{framenumberappendix}
	\setcounter{framenumberappendix}{\value{framenumber}}
}
\newcommand{\backupend}{
	\addtocounter{framenumberappendix}{-\value{framenumber}}
	\addtocounter{framenumber}{\value{framenumberappendix}} 
}


\definecolor{links}{HTML}{2A1B81}
%\hypersetup{colorlinks,linkcolor=,urlcolor=links}

% For shapo's formulas:
\def\lsi{\raise0.3ex\hbox{$<$\kern-0.75em\raise-1.1ex\hbox{$\sim$}}}
\def\gsi{\raise0.3ex\hbox{$>$\kern-0.75em\raise-1.1ex\hbox{$\sim$}}}
\newcommand{\lsim}{\mathop{\lsi}}
\newcommand{\gsim}{\mathop{\gsi}}
\newcommand{\wt}{\widetilde}
%\newcommand{\ol}{\overline}
\newcommand{\Tr}{\rm{Tr}}
\newcommand{\tr}{\rm{tr}}
\newcommand{\eqn}[1]{&\hspace{-0.7em}#1\hspace{-0.7em}&}
\newcommand{\vev}[1]{\rm{$\langle #1 \rangle$}}
\newcommand{\abs}[1]{\rm{$\left| #1 \right|$}}
\newcommand{\eV}{\rm{eV}}
\newcommand{\keV}{\rm{keV}}
\newcommand{\GeV}{\rm{GeV}}
\newcommand{\im}{\rm{Im}}
\newcommand{\disp}{\displaystyle}
\def\be{\begin{equation}}
\def\ee{\end{equation}}
\def\ba{\begin{eqnarray}}
\def\ea{\end{eqnarray}}
\def\d{\partial}
\def\l{\left(}
\def\r{\right)}
\def\la{\langle}
\def\ra{\rangle}
\def\e{{\rm e}}
\def\Br{{\rm Br}}
\def\fixme{{\color{red} FIXME!}}
\def\mc{{\color{Magenta}{MC}}}
\def\pdf{{\rm p.d.f.}}

\author{ {\fontspec{Trebuchet MS}Marcin Chrz\k{a}szcz} (CERN)}
\institute{CERN}
\title[Introduction to \\Monte Carlo methods]{Introduction to \\Monte Carlo methods}
\date{\fixme}


\begin{document}
\tikzstyle{every picture}+=[remember picture]

{
\setbeamertemplate{sidebar right}{\llap{\includegraphics[width=\paperwidth,height=\paperheight]{bubble2}}}
\begin{frame}[c]%{\phantom{title page}} 
\begin{center}
\begin{center}
	\begin{columns}
		\begin{column}{0.9\textwidth}
			\flushright\fontspec{Trebuchet MS}\bfseries \Huge {Introduction to \\Monte Carlo methods}
		\end{column}
		\begin{column}{0.2\textwidth}
		  %\includegraphics[width=\textwidth]{SHiP-2}
		\end{column}
	\end{columns}
\end{center}
	\quad
	\vspace{3em}
\begin{columns}
\begin{column}{0.44\textwidth}
\flushright \vspace{-1.8em} {\fontspec{Trebuchet MS} \Large Marcin Chrząszcz\\\vspace{-0.1em}\small \href{mailto:mchrzasz@cern.ch}{mchrzasz@cern.ch}}

\end{column}
\begin{column}{0.53\textwidth}
\hspace{0.3\textwidth}\includegraphics[height=1.3cm]{cern}
\end{column}
\end{columns}

\vspace{1em}
%		\footnotesize\textcolor{gray}{With N. Serra, B. Storaci\\Thanks to the theory support from M. Shaposhnikov, D. Gorbunov}\normalsize\\
\vspace{0.5em}
	\textcolor{normal text.fg!50!Comment}{Experimental Methods in Particle Physics, \\ 4 October, 2017}
\end{center}
\end{frame}
}


\begin{frame}\frametitle{Literature}

\begin{enumerate}
\item J. M. Hammersley, D. C. Hamdscomb, ``Monte Carlo Methods'', London: Methuen \& Co. Ltd., New York: J. Wiley \& Sons Inc., 1964
\item I. M. Sobol, ``The Monte Carlo Method'', Mir Publishers, Moscow, 1975.
\item M. H. Kalos, P. A. Whitlock, ,,Monte Carlo Methods”, J. Wiley \& Sons Inc., New York, 1986
\item G. S. Fishman, ,,Monte Carlo: Concepts, Algorithms and Applications”, Springer, 1996.
\item R. Y. Rubinstein, D. P. Kroese, ,,Simulation and the Monte Carlo Method”, Second Edition, J. Wiley \& Sons Inc., 2008.
\item R. Korn, E. Korn, G. Kroisandt, ,,Monte Carlo methods and models in finance and insurance”, CRC Press,
Taylor \& Francis Group, 2010.
\item S. Jadach, ,,Practical Guide to Monte Carlo”, \href{http://arxiv.org/abs/physics/9906056}{arXiv:physics/9906056}, \href{http://cern.ch/jadach/MCguide/}{http://cern.ch/jadach/MCguide/}.
\end{enumerate}


\end{frame}




\begin{frame}\frametitle{Course Plan}

We will have 6 hours of Monte Carlo (MC) lectures. The lectures will be devoted:\\

\hspace{1.5cm}
\begin{itemize}
\item 1 h: Mathematical introduction to MC methods.
\item 1 h: MC integration methods.
\item 2 h: Random numbers generators.
\item 2 h: Markov Chain MC.
\item 2 h: Tutorial and examples.
\end{itemize}
\hspace{1cm} \\
The hands-on tutorial will consist of program templates in which we will implement couple of algorithms that were explained in the lecture. \\
$\Rrightarrow$ All examples shown in this course are available in the github repository:\\
\url{https://github.com/mchrzasz/EMPP_MC}\\
\color{RubineRed}{There will be an indication (in this color) on the adequate slide for each of the macro.}


\end{frame} 


\begin{frame}\frametitle{Definitions}
\begin{footnotesize}
$\Rrightarrow$ Basic definition:\\
        \begin{exampleblock}{}                                                                                                                                                                                                                                                                                                
                Monte Carlo method is any technique that uses {\it{random numbers}} to solve a given mathematical problem.
        \end{exampleblock}  
%        \vspace{0.5cm} 

$\rightarrowtail$ Random number: For the purpose of this course we need to assume that we know what it is, although the formal definition is highly non-trivial.\\
\vspace{0.05cm}
$\Rrightarrow$ My favourite definition (Halton 1970): \begin{scriptsize}more complicated, but more accurate.\end{scriptsize}

\begin{exampleblock}{}                                                                                                                                                                                                                                                                                                
''Representing the solution of a problem as a parameter of a hypothetical population, and using a random sequence of numbers to construct a sample of the population, from which statistical estimates of the parameter can be obtained.''
 \end{exampleblock} 
To put this definition in mathematical language:\\
Let $F$ be a solution of a given mathematical problem. The estimate of the result $\hat{F}$:\\
\begin{equation*}
\hat{F}=f( \lbrace r_1, r_2, r_3,...,r_n \rbrace; ...),
\end{equation*}
where $\lbrace r_1, r_2, r_3,...,r_n \rbrace$ are random numbers.
\begin{center}
\color{red}{The problem we are solving doesn't need to be stochastic!}
\end{center}
\begin{scriptsize}
$\twoheadrightarrow$ One could wonder why are we trying to add all the stochastic properties to a deterministic problem. Those are the properties that allow to use all well known statistic theorems. 
 \end{scriptsize}
\end{footnotesize}
\end{frame}

\begin{frame}\frametitle{History of MC methods}
\begin{footnotesize}
\begin{itemize}
\item {\color{PineGreen}{G. Compte de Buffon (1777)}} - First documented usage of random numbers for integral computation (Buffon thrown niddle on the table with parrarel line; we will do a modern version of this exercise).
\item {\color{PineGreen}{Marquis de Laplace (1886)}} - Used the Buffon niddle to determine the value of $\pi$ number.
\item {\color{PineGreen}{Lord Kelvin (1901)}} - Thanks to drawing randomly numbered cards he managed he managed to calculate some integrals in kinematic gas theorem.
\item {\color{PineGreen}{W. S. Gosse (better knows as Student) (1908)}} - Used similar way as Lord Kelvin to get random numbers to prove \textit{t}-Student distribution.
\item {\color{PineGreen}{Enrico Fermi (1930) }} - First mechanical device (\texttt{FERMIAC}) for random number generations. Solved neutron transport equations in the nuclear plants.
\item {\color{PineGreen}{S. Ulam, R. Feynman, J. von Neumann et. al.}} - First massive usage of random numbers. Most applications were in Manhattan project to calculate neutron scattering and absorption. \\
In {\color{NavyBlue}{Los Alamos}} the name {\color{Mahogany}{Monte Carlo}} was created as kryptonim of this kind of calculations. 
\end{itemize}


\end{footnotesize}
\end{frame}



\begin{frame}\frametitle{Euler number determination, $\rm \color{RubineRed}{Lecture1/Euler\_number}$  }
\begin{footnotesize}
$\Rrightarrow$ As mentioned before \mc~methods can be used to solve problems that \textbf{do not} have stochastic nature! All the integrals calculated in Los Alamos during the Manhattan project are nowadays solvable without any \mc~methods.\\
$\rightarrowtail$ Let's give a trivial example of solving a non stochastic problem: calculating Euler number $e$. We know that $e=2.7182818...$.
$\Rrightarrow$ To calculate the $\hat{e}$ we will use the following algorithm: 
\begin{itemize}
\item We generate a random number in range $(0,1)$ (in stat. $\mathcal{U}(0,1)$) until the number we generate is smaller then the previous one, aka we get the following sequence:
\begin{align*}
x_1<x_2<...<x_{n-1}>x_{n}
\end{align*}
\item We store the number $n$. We repeat this experiment $N$ times and calculate the arithmetic average of $n$. The obtained value is an statistical estimator of $e$:
\begin{align*}
\hat{e}= \dfrac{1}{N}\sum_{i=1}^N n_i \xrightarrow{N\to \infty} e .
\end{align*}
\end{itemize}
$\Rrightarrow$ Numerical example:
\begin{tabular}{r c c c }
$N$ & $\hat{e}$ & $\hat{e} - e$ & \multirow{5}{*}{Is this $\sim\sqrt{N}$?} \\
100 & $2.760000$ & $0.041718$ \\
10000 & $2.725000$ & $0.006718$ \\
1000000  & $2.718891$ & $0.000609$ \\
100000000 & $2.718328$ & $0.000046$\\
\end{tabular}


\end{footnotesize}
\end{frame}

\begin{frame}\frametitle{Let's test the $\sqrt{N}$, $\rm \color{RubineRed}{Lecture1/Euler\_number}$  }

\only<1>
{
$\Rrightarrow$ In the last example we measured the Euler number using different number of pseudo-experiments.\\
$\rightarrowtail$ We compared the obtained value to the true and observed roughly a $\sqrt{N}$ dependence on the difference between the true value and the obtained one.\\
}
$\rightarrowtail$ Could we test this? YES! Lets put our experimentalist hat on!\\
$\rightarrowtail$ From the begging of studies they tooth us to get the error you need to repeat the measurements.
\only<1>
{
\begin{exampleblock}{The algorithm:}                                                                                                                                                                                                                                                                                                
Previous time we measured Euler number using $N$ events, where $N \in (100, 1000, 10000, 100000)$. Now lets repeat this measurement $n_N$ times (of course each time we use new generated numbers). From the distribution of $\hat{e} -e$ we could say something about the uncertainty of our estimator for given $N$.
 \end{exampleblock} 
}

\begin{center}
\only<2>
{
\includegraphics[angle=-90,width=0.8\textwidth]{images/result_error.pdf}
}
\only<3>
{
\includegraphics[angle=-90,width=0.8\textwidth]{images/result_error_dep.pdf}
}
\end{center}












\end{frame}

\begin{frame}\frametitle{Monte Carlo and integration}
\begin{footnotesize}
$\hookrightarrow$ {\color{BrickRed}{\textbf{All MC calculations are equivalent to preforming an integration.}}}\\
$\rightrightarrows$ Assumptions: $r_i$ random numbers from $\mathcal{U}(0,1)$. The MC result:
\begin{align*}
F=F(r_1,r_2,...r_n)
\end{align*}
is unbias estimator of an integral:
\begin{align*}
I=\int_0^1...\int_0^1 F(x_1,x_2,...,x_n)dx_1,dx_2...,dx_n
\end{align*}
aka the expected value of the $I$ integral is:
\begin{align*}
E(F)=I.
\end{align*}
    \begin{exampleblock}{}                                                                                                                                                                                                                                                                                                
$\Rrightarrow$ This mathematical identity is the most useful property of the MC methods. It is a link between mathematical analysis and statistic world. Now we can use the best of the both world!
        \end{exampleblock} 
If we want to calculate the integral in different range then $(0,1)$ we just scale the the previous result:
\begin{align*}
\dfrac{1}{N}\sum_{i=1}^N f(x_i) \xrightarrow{N\to \infty} E(f)=\dfrac{1}{b-a}\int_a^b f(x)dx
\end{align*}


\end{footnotesize}
\end{frame}


\begin{frame}\frametitle{Uncertainty from Monte Carlo methods}
\begin{footnotesize}
$\Rrightarrow$ In practice we do not have $N\to \infty$ so we will never know the exact result of an integral :(\\
$\longmapsto$ Let's use the {\color{BrickRed}{statistical}} world and estimate the uncertainty of an integral in this case :)\\
$\rightarrowtail$ A variance of a MC integral:
\begin{align*}
V(\hat{I}) = \dfrac{1}{n} \Big\lbrace E(f^2) - E^2(f) \Big\rbrace = \dfrac{1}{n} \Big\lbrace \dfrac{1}{b-a} \int_a^b f^2(x)dx - I^2 \Big\rbrace
\end{align*}
    \begin{alertblock}{}                                                                                                                                                                                                                                                                                                
$\looparrowright$ To calculate $V(\hat{I})$ one needs to know the value of $I$!
\end{alertblock}
$\Rrightarrow$ In practice $V(\hat{I})$ is calculated via estimator:
\begin{columns}
\column{2in}
\begin{align*}
\hat{V}(\hat{I})=\dfrac{1}{n}\hat{V}(f),
\end{align*}
\column{3in}
\begin{align*}
\hat{V}(f) = \dfrac{1}{n-1}\sum_{i=1}^n  \Big[ f(x_i)-\dfrac{1}{n} \sum_{i=1}^nf(x_i)\Big]^2.
\end{align*}
\end{columns}


$\Rrightarrow$ MC estimator of standard deviation: $\hat{\sigma}=\sqrt{\hat{V}(\hat{I})}$


\end{footnotesize}
\end{frame}


\begin{frame}\frametitle{Buffon needle - $\pi$ number calculus}
\begin{footnotesize}

$\Rrightarrow$ Buffon needle (Buffon 1777, Laplace 1886):
We are throwing a needle (of length $l$) on to a surface covered with parallel lines width distance $L$. If a thrown needle touches a line we count a hit, else miss. Knowing the number of hits and misses one can calculate the $\pi$ number.
\vspace{0.3cm}
\begin{columns}
\column{0.1in}
{~}
\column{2in}
{\color{ForestGreen}{Experiment:}}
\column{2.8in}
{\color{Cerulean}{Theory:}}

\end{columns}


\begin{columns}
\column{0.1in}
{~}
\column{2in}

\includegraphics[width=0.9\textwidth]{images/buffon.png}\\
$n$ -  number of hits\\
$N$ number of hits and misses,\\
aka number of tries.

\column{2.8in}
$\Rightarrow$ x - angle between needle and horizontal line, $x \in \mathcal{U}(0,\pi)$.
$\Rightarrow$ the probability density function (\pdf) for x:
\begin{align*}
\rho(x)=\dfrac{1}{\pi}
\end{align*}
$\Rightarrow$  $p(x)$ probability to hit a line for a given x value:
\begin{align*}
p(x)=\dfrac{l}{L}\vert \cos x \vert
\end{align*}
$\Rightarrow$ Total hit probability:
\begin{align*}
P = E[p(x)]=\int_0^{\pi}p(x)\rho(x)dx=\dfrac{2l}{\pi L}
\end{align*}

\end{columns}
Now one can calculate $\hat{P}$ from MC : $\hat{P}=\dfrac{n}{N} \xrightarrow{N\to \infty} P= \dfrac{2l}{\pi L} \Rightarrow \hat{\pi}=\dfrac{2Nl}{nL}$


\end{footnotesize}
\end{frame}











\begin{frame}\frametitle{Buffon needle - Simplest Carlo method}
\begin{footnotesize}
{\color{MidnightBlue}{Monte Carlo type ''heads or tails''}}\\
Let's use the summery of $p(x)$ function nad take $0<x<\frac{\pi}{2}$.\\
$\Rightarrow$ Algorithm:\\
\begin{columns}
\column{0.1in}
{~}
\column{3.2in}



Generate 2 dim. distribution:
\begin{align*}
(x,y): \mathcal{U}(0,\dfrac{\pi}{2})\times \mathcal{U}(0,1) {\rm{~and~}}
\end{align*}
\begin{align*}
y 
\begin{cases}
  \leq p(x): & \text{hit},\\
> p(x):             & \text{miss}.
\end{cases}
\end{align*}

\column{2.5in}
\includegraphics[width=0.75\textwidth]{images/result.png}



\end{columns}
Let's define weight function: $w(x,y)=\Theta(p(x)-y)$, \\
where $\Theta(x)$ is the step function.\\
$\rightarrowtail$ \pdf : $\varrho(x,y)=\rho(x)g(y)=\frac{2}{\pi} \cdot 1$\\
$\Rightarrow$ Integrated probability:
\begin{align*}
P=E(w)=\int w(x,y) \varrho(x,y)dx dy = \dfrac{2l}{\pi L} \xleftarrow{N\to \infty}\hat{P}=\frac{1}{N} \sum_{i=1}^N w(x_i,y_i)= \dfrac{n}{N}
\end{align*}
Standard deviation for $\hat{P}$: $\hat{\sigma}=\dfrac{1}{\sqrt{N-1}}\sqrt{\dfrac{n}{N}\Big(1-\dfrac{n}{N}\Big)} $



\end{footnotesize}
\end{frame}


\begin{frame}\frametitle{Buffon needle, $\rm \color{RubineRed}{Lecture1/Heads\_tails}$  }
\begin{small}


$\Rrightarrow$ Lets make this toy experiment and calculate the $\pi$ number.\\
$\hookrightarrow$ We can simulate the central position $(y)$ of an needle between $(-L, L)$ from $\mathcal{U}(-L, L)$. 

  \begin{exampleblock}{Symmetry:}                                                                                                                                                                                                                                                                                                
Please note the symmetry of the problem, if the position of the needle would be $>L$ then we can shift the needle by any number of $L$'s.
        \end{exampleblock} 

$\hookrightarrow$ New we simulate the angle $(\phi)$ with a flat distribution from $(0,\pi)$. 
$\hookrightarrow$ The maximum and minimum $y$ position of the needle are:
\begin{align*}
y_{\max}=y+\vert \cos \phi \vert l\\
y_{\min}=y-\vert \cos \phi \vert l
\end{align*}
$\hookrightarrow$ Now we check if the needle touches any of the lines: $y=L$, $y=0$ or $y=-L$. If yes we count the events.
\end{small}
\begin{center}


\begin{footnotesize}


\begin{tabular}{r c c c }
$N$ & $\hat{\pi}$ & $\hat{\pi} - \pi$ & $\sigma(\hat{\pi})$ \\
10000 & $3.12317$ & $-0.01842$ & $0.03047$\\
100000 & $3.14707$ & $0.00547$ & $0.00979$\\
1000000 & $3.13682$ & $-0.00477$ & $0.00307$\\
10000000 & $3.14096$ & $-0.00063$ & $0.00097$\\


\end{tabular}

\end{footnotesize}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}\frametitle{Central Limit Theorem, $\rm \color{RubineRed}{Lecture1/CLT}$   }
\begin{footnotesize}
    \begin{exampleblock}{}                                                                                                                                                                                                                                                                                                
Large independent random numbers assembly has always Gaussian distribution no matter from what distribution they were generated from as far as they have finite variances and expected values and the assembly is sufficiently large.
        \end{exampleblock} 
\includegraphics[width=0.9\textwidth]{images/dupa.png}


\end{footnotesize}
\end{frame}


\begin{frame}\frametitle{Crude Monte Carlo method of integration}
\begin{footnotesize}
$\Rrightarrow$ {\color{MidnightBlue}{Crude Monte Carlo method of integration is based on Central Limit Theorem (CLT): }}\\
\begin{align*}
\dfrac{1}{N} \sum_{i=1}^N f(x_i) \xrightarrow{N\to \infty} \dfrac{1}{b-a}\int_a^b f(x)dx =E(f)
\end{align*}
$\Rrightarrow$ The standard deviation can be calculated:
\begin{align*}
\sigma = \dfrac{1}{\sqrt{N}} \sqrt{\Big[ E(f^2) -E^2(f)\Big] }
\end{align*}

$\Rrightarrow$ From LNT we have:
\begin{align*}
P= \int w(x) \rho(x) dx = \int_0^{\pi/2} (\frac{l}{L} \cos x ) \frac{2}{\pi} dx= \dfrac{2l}{\pi L}  \xrightarrow{N\to \infty} \frac{1}{N}\sum_{i=1}^N w(x_i)
\end{align*}
$\Rrightarrow$ Important comparison between ''Hit and mishit'' and Crude \mc~methods. One can analytically calculate:

\begin{align*}
\hat{\sigma}^{{\rm{Crude}}} < \hat{\sigma}^{{\rm{Hit~and~mishit}}}
\end{align*}


$\Rrightarrow$ Crude \mc~is \textbf{always} better then ''Hit and mishit'' method. We will prove this on an example (can be proven analytically as well).


\end{footnotesize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}\frametitle{Crude MC vs ”Hit and mishit”,  $\rm \color{RubineRed}{Lecture1/Crude\_vs\_HT}$}

$\Rrightarrow$ We can repeat a toy MC studies as we did in the Euler needle case.\\
$\hookrightarrow$ In this example we want to calculate $\int_0^{\pi/2} \cos x dx$

\only<1>{
\begin{center}
\includegraphics[angle=-90,width=0.8\textwidth]{images/results_0.pdf}
\end{center}
}
\only<2>{
\begin{center}
\includegraphics[angle=-90,width=0.8\textwidth]{images/results_1.pdf}
\end{center}
}
\only<3>{
\begin{center}

\includegraphics[angle=-90,width=0.4\textwidth]{images/results_fit_0.pdf}
\includegraphics[angle=-90,width=0.4\textwidth]{images/results_fit_1.pdf}\\
\end{center}
$\Rrightarrow$ One clearly sees that both methods follow $1/\sqrt{N}$ dependence and that the Crude MC is always better then the ''Hit and mishit''.\\
$\Rrightarrow$ Please note that for the ''Hit and mishit'' we are suing 2 times more random numbers than for the Crude method so in terms of timing the Crude MC is also much faster.

}



\end{frame}

\begin{frame}\frametitle{Classical methods of variance reduction}
\begin{footnotesize}

$\Rrightarrow$ In Monte Carlo methods the statistical uncertainty is defined as:
\begin{align*}
\sigma = \dfrac{1}{\sqrt{N}}\sqrt{V(f)}
\end{align*}
$\Rrightarrow$ Obvious conclusion:
\begin{itemize}
\item To reduce the uncertainty one needs to increase $N$.\\
$\rightrightarrows$ Slow convergence. In order to reduce the error by factor of 10 one needs to simulate factor of 100 more points!
\end{itemize}
$\Rrightarrow$ How ever the other handle ($V(f)$) can be changed! $\longrightarrow$ Lot's of theoretical effort goes into reducing this factor.\\
$\Rrightarrow$ We will discuss {\color{Mahogany}{four}} classical methods of variance reduction:
\begin{enumerate}
\item Stratified sampling.
\item Importance sampling.
\item Control variates.
\item Antithetic variates.
\end{enumerate}





\end{footnotesize}
\end{frame}




\begin{frame}\frametitle{Stratified sampling}
\begin{footnotesize}
$\Rrightarrow$ The most intuitive method of variance reduction. The idea behind it is to divide the function in different ranges and to use the Riemann integral property:
\begin{align*}
I = \int_0^1 f(u) du = \int_0^a f(u)du + \int_a^1 f(u) du,~ 0<a<1.
\end{align*}

$\Rrightarrow$ The reason for this method is that in smaller ranges the integration function is more flat. And it's trivial to see that the more flatter you get the smaller uncertainty. 
$\rightrightarrows$ A constant function would have zero uncertainty!

    \begin{exampleblock}{General schematic:}                                                                                                                                                                                                                                                                                                
Let's take our integration domain and divide it in smaller domains. In the $j^{th}$ domain with the volume $w_j$ we simulate $n_j$ points from uniform distribution. We sum the function values in each of the simulated points for each of the domain. Finally we sum them with weights proportional to $w_i$ and anti-proportional to $n_i$. 
        \end{exampleblock} 

\end{footnotesize}
\end{frame}

\begin{frame}\frametitle{Stratified sampling - mathematical details}
\begin{footnotesize}
Let's define our integrals and domains:
\begin{align*}
I=\int_{\Omega} f(x) dx,{~}{~} \Omega=\bigcup_{i=1}^k w_i
\end{align*}
The integral over $j^{th}$ domain:
\begin{align*}
I_j=\int_{w_j} f(x) dx,{~}{~} \Rightarrow I = \sum_{j=1}^k I_i
\end{align*}
$\rightrightarrows$ $p_j$ uniform distribution in the $w_j$ domain: $dp_j=\frac{dx}{w_j}$.\\
$\rightrightarrows$ The integral is calculated based on crude \mc~method. The estimator is equal:
\begin{align*}
\hat{I}_j = \frac{w_j}{n_j} \sum_{i=1}^{n_j}f(x_j^i)
\end{align*}
Now the total integral is just a sum:
\begin{align*}
\hat{I} = \sum_{j=1}^k \hat{I}_j = \sum_{j=1}^k \frac{w_j}{n_j} \sum_{i=1}^{n_j} f(x_j^{(i)}), 
\end{align*}
\begin{columns}
\column{0.2in}
\column{2in}
Variance:
$V(\hat{I})=\sum_{j=1}^k \dfrac{w_j^2}{n_j}V_j(f)$,
\column{3in}
and it's estimator:
$\hat{V} (\hat{I}) = \sum_{j=1}^k \dfrac{w_j^2}{n_j} \hat{V}_j(f)$
\end{columns}

\end{footnotesize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}\frametitle{Importance sampling}
\begin{footnotesize}
$\Rrightarrow$ If the function is changing rapidly in its domain one needs to use a more elegant method: make the function more stable.\\
$\rightrightarrows$ The solution is from first course of mathematical analysis: change the integration variable :)
\begin{align*}
f(x)dx \longrightarrow \frac{f(x)}{g(x)}dG(x),~{\rm{where}}~g(x)=\frac{dG(x)}{dx}
\end{align*}
    \begin{exampleblock}{Schematic:}                                                                                                                                                                                                                                                                                                
\begin{itemize}
\item Generate the distribution from $G(x)$ instead of $\mathcal{U}$.
\item For each generate point calculate the weight: $w(x)=\frac{f(x)}{g(x)}$.
\item We calculate the expected value $\hat{E}(w)$ and its variance $\hat{V}_G(w)$ for the whole sample.
\end{itemize}
        \end{exampleblock} 



\begin{itemize}

\item If $g(x)$ is choose correctly the resulting variance can be much smaller.
\item There are some mathematical requirements:
\begin{itemize}
\item $g(x)$ needs to be non-negative and analytically integrable on its domain.
\item $G(x)$ invertible or there should be a direct generator of $g$ distribution.
\end{itemize}
\end{itemize}


\end{footnotesize}
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}\frametitle{Importance sampling - Example}
\begin{footnotesize}

$\Rrightarrow$ Let's take our good old $\pi$ determination example.
\vspace{0.3cm}
\begin{columns}
\column{0.1in}
\column{3in}
$\Rrightarrow$ Let's take here for simplicity: $L=l$. 
\begin{itemize}
\item Let's take a trivial linear weight function: $g(x)=\frac{4}{\pi}(1-\frac{2}{\pi}x)$
\item It's invertible analytically: $G(x)=\frac{4}{\pi}x(1-\frac{x}{\pi})$
\item The weight function:
\begin{align*}
w(x)=\frac{p(x)}{g(x)}=\frac{\pi}{4}\frac{\cos x}{1-2x/ \pi}
\end{align*}
\end{itemize}

\column{2in}
\includegraphics[width=0.95\textwidth]{images/result_weight.png}
\end{columns}
\begin{itemize}
\item Now the new standard deviation is smaller:
\end{itemize}
\begin{align*}
\sigma_{\pi}^{{\rm{IS}}} \simeq \frac{0.41}{\sqrt{N}} < \sigma_{\pi}\simeq \frac{1.52}{\sqrt{N}}
\end{align*}
\begin{itemize}
\item Importance sampling has advantages:
\begin{itemize}
\item Big improvements of variance reduction.
\item The only method that can cope with singularities.
\end{itemize}
\end{itemize}


\end{footnotesize}
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}\frametitle{Wrap up}
\begin{footnotesize}
$\Rrightarrow$ To sum up:
\begin{itemize}
\item We discussed basic mathematical properties of \mc~methods.
\item We shown that besides the stochastic nature of \mc~ they can be used to determine totally non stochastic quantities.
\item We demonstrated there is a perfect isomorphism between \mc~method and integration.
\item We learned how co calculate integrals and estimate the uncertainties.
\item Finally we discussed several classical methods of variance reduction.
\end{itemize}


\end{footnotesize}
\end{frame}








\backupbegin   

\begin{frame}\frametitle{Backup}


\end{frame}

\begin{frame}\frametitle{Control variates}
\begin{footnotesize}
$\Rrightarrow$ Control variates uses an other nice property of Riemann integral:
\begin{align*}
\int f(x) dx = \int [f(x)-g(x)]dx+ \int g(x)dx
\end{align*}
\begin{itemize}
\item $g(x)$ needs to be analytically integrable.
\item The uncertainty comes only from the integral: $\int [f(x)-g(x)]dx$. 
\item Obviously: $V(f\to g) \xrightarrow{f\to g} 0$
\end{itemize}
$\Rrightarrow$ Advantages:\\
\begin{itemize}
\item Quite stable, immune to the singularities.
\item $g(x)$ doesn't need to be invertible analytically.
\end{itemize}
$\Rrightarrow$ Disadvantage:\\
\begin{itemize}
\item Useful only if you know $\int g(x)dx$
\end{itemize}

\end{footnotesize}
\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{frame}\frametitle{Antithetic variates}
\begin{footnotesize}
$\Rrightarrow$ In \mc~methods usually one uses the independent random variables. The Antithetic variates method on purpose uses a set of correlated variables (negative correlation is the important property):
\begin{itemize}
\item Let $f$ and $f\prime$ will be functions of x on the same domain.
\item The variance: $V(f+f\prime)=V(f)+V(f\prime)+2 Cov(f,f\prime)$.
\item If $Cov(f,f\prime)<0$ then you can reduce the variance.
\end{itemize}
$\Rrightarrow$ Advantages:
\begin{itemize}
\item If you can pick up $f$ and $f\prime$ so that they have negative correlation one can significantly reduce the variance!
\end{itemize}

$\Rrightarrow$ Disadvantages:
\begin{itemize}
\item There are no general methods to produce such a negative correlations.
\item Hard to generalize this for multidimensional case.
\item You can't generate events from $f(x)$ with this method.
\end{itemize}



\end{footnotesize}
\end{frame}

\backupend			

\end{document}