Lecture_repo / Lectures_my / MC_2016 / Lecture5 / mchrzasz.tex
\documentclass[11 pt,xcolor={dvipsnames,svgnames,x11names,table}]{beamer}

\tcbset{highlight math style={enhanced,

	bullet=circle,		% Other option: square
	bigpagenumber,		% circled page number on lower right
	topline=true,			% colored bar at the top of the frame 
	shadow=false,			% Shading for beamer blocks
	watermark=BG_lower,	% png file for the watermark



\defaultfontfeatures{Mapping=tex-text}	% This seems to be important for mapping glyphs properly

\setmainfont{Gillius ADF}			% Beamer ignores "main font" in favor of sans font
\setsansfont{Gillius ADF}			% This is the font that beamer will use by default
% \setmainfont{Gill Sans Light}		% Prettier, but harder to read

\setbeamerfont{title}{family=\fontspec{Gillius ADF}}

\input t1augie.fd

%\newcommand{\handwriting}{\fontspec{augie}} % From Emerald City, free font
%\newcommand{\handwriting}{\usefont{T1}{fau}{m}{n}} % From Emerald City, free font
% \newcommand{\handwriting}{}	% If you prefer no special handwriting font or don't have augie

%% Gill Sans doesn't look very nice when boldfaced
%% This is a hack to use Helvetica instead
%% Usage: \textbf{\forbold some stuff}

\usepackage{amsmath, amssymb} 

\usepackage{mathrsfs} 			% For Weinberg-esque letters
\usepackage{cancel}				% For "SUSY-breaking" symbol
\usepackage{slashed}            % for slashed characters in math mode
\usepackage{bbm}                % for \mathbbm{1} (unit matrix)
\usepackage{amsthm}				% For theorem environment
\usepackage{multirow}			% For multi row cells in table
\usepackage{arydshln} 			% For dashed lines in arrays and tables


% Drawing a line
\tikzstyle{lw} = [line width=20pt]
  \tikz[remember picture,overlay] {%
    \draw[crimsonred] ([yshift=-23.5pt]current page.north west)
             -- ([yshift=-23.5pt,xshift=\paperwidth]current page.north west);}}

% % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % % %
\usepackage{tikzfeynman}		% For Feynman diagrams
\usetikzlibrary{matrix,arrows} 				% For commutative diagram
\usetikzlibrary{positioning}				% For "above of=" commands
\usetikzlibrary{calc,through}				% For coordinates
\usetikzlibrary{decorations.pathreplacing}  % For curly braces
\usepackage{pgffor}							% For repeating patterns

\usetikzlibrary{decorations.pathmorphing}	% For Feynman Diagrams
	% >=stealth', %%  Uncomment for more conventional arrows
	vector/.style={decorate, decoration={snake}, draw},
	provector/.style={decorate, decoration={snake,amplitude=2.5pt}, draw},
	antivector/.style={decorate, decoration={snake,amplitude=-2.5pt}, draw},
	fermion/.style={draw=gray, postaction={decorate},
		decoration={markings,mark=at position .55 with {\arrow[draw=gray]{>}}}},
	fermionbar/.style={draw=gray, postaction={decorate},
		decoration={markings,mark=at position .55 with {\arrow[draw=gray]{<}}}},
	gluon/.style={decorate, draw=black,
		decoration={coil,amplitude=4pt, segment length=5pt}},
	scalar/.style={dashed,draw=black, postaction={decorate},
		decoration={markings,mark=at position .55 with {\arrow[draw=black]{>}}}},
	scalarbar/.style={dashed,draw=black, postaction={decorate},
		decoration={markings,mark=at position .55 with {\arrow[draw=black]{<}}}},
	electron/.style={draw=black, postaction={decorate},
		decoration={markings,mark=at position .55 with {\arrow[draw=black]{>}}}},
	bigvector/.style={decorate, decoration={snake,amplitude=4pt}, draw},

% TIKZ - for block diagrams, 
% from
% \usetikzlibrary{shapes,arrows}
\tikzstyle{block} = [draw, rectangle, 
minimum height=3em, minimum width=6em]


\usetikzlibrary{mindmap,trees}	% For mind map
\newcommand\Ts{\rule{0pt}{2.6ex}}       % Top strut
\newcommand\Bs{\rule[-1.2ex]{0pt}{0pt}} % Bottom strut

\graphicspath{{images/}}	% Put all images in this directory. Avoids clutter.

% \renewcommand{\tilde}{\widetilde} % dinky tildes look silly, dosn't work with fontspec
%\newcommand{\comment}[1]{\textcolor{comment}{\footnotesize{#1}\normalsize}} % comment mild
%\newcommand{\Comment}[1]{\textcolor{Comment}{\footnotesize{#1}\normalsize}} % comment bold
%\newcommand{\COMMENT}[1]{\textcolor{COMMENT}{\footnotesize{#1}\normalsize}} % comment crazy bold
\newcommand{\Alert}[1]{\textcolor{Alert}{#1}} % louder alert
\newcommand{\ALERT}[1]{\textcolor{ALERT}{#1}} % loudest alert
%% "\alert" is already a beamer pre-defined



\setbeamertemplate{bibliography item}[text]


% suppress frame numbering for backup slides
% you always need the appendix for this!


% For shapo's formulas:
\newcommand{\vev}[1]{\rm{$\langle #1 \rangle$}}
\newcommand{\abs}[1]{\rm{$\left| #1 \right|$}}
\def\e{{\rm e}}
\def\Br{{\rm Br}}
\def\fixme{{\color{red} FIXME!}}
\def\pdf{{\rm p.d.f.}}

\author{ {\fontspec{Trebuchet MS}Marcin Chrz\k{a}szcz} (Universit\"{a}t Z\"{u}rich)}
\title[Testing random number generators]{Testing random number generators}

\tikzstyle{every picture}+=[remember picture]

\setbeamertemplate{sidebar right}{\llap{\includegraphics[width=\paperwidth,height=\paperheight]{bubble2}}}
\begin{frame}[c]%{\phantom{title page}} 
			\flushright\fontspec{Trebuchet MS}\bfseries \Huge {Testing random number generators}
\flushright \vspace{-1.8em} {\fontspec{Trebuchet MS} \Large Marcin ChrzÄ…szcz\\\vspace{-0.1em}\small \href{}{}}


%		\footnotesize\textcolor{gray}{With N. Serra, B. Storaci\\Thanks to the theory support from M. Shaposhnikov, D. Gorbunov}\normalsize\\
	\textcolor{normal text.fg!50!Comment}{Monte Carlo methods, \\ 24 March, 2016}

\begin{frame}\frametitle{General methodology}
 \begin{exampleblock}{How to check if we have a good generator?}
 The generator is good if the number sequences that it produces have properties of truly random numbers.\\
 But how to check this!?!
 \ARROW Traditional methodology:\\
 Define some properties of random numbers from $\mathcal{U}(0,1)$ and check if in the tests the properties are conserved.\\
 \item The problem with this approach is the fact that there are infinite number of test like this one would have to do :(
 \item In practice one can only only prove the generator is bad, but not that it's good.
 \item There is no way to guarantee that if the $n$ tests are fulfilled the $n+1$ will not fail!
\ARROW The testing can be only in terms of so-called negative selection.\\
\ARROW By each test our trust in the generator increases our trust in it, but it's not GM cars! There is no guarantee.


\begin{frame}\frametitle{General methodology, example}
 \ARROW Let's assume we have a generator that has $\mathcal{U}(0,1)$:
 \item We generate $n$ numbers($n$ is fixed).
 \item From them we calculate a values of test function $T$.
 \item We calculate the $F(T)$ where F is the CDF of the $T$ statistics.
 \item Repeat the procedure $N$ times: $T_1, ..., T_N$ and $F(T_1),...,F(T_N)$.
 \ARROW If the generator is good(hipothesis of $\mathcal{U}(0,1)$ is true the $F(T_1), ..., F(T_N)$ will have the distribution of $\mathcal{U}(0,1)$. One usually quotes the credibility level of a test!\\
 \ARROW There are number of test that the generator can be applied to. in the literature:
\item ''The Art of Computer Programming'', Author	Donald Knuth
\item \texttt{DIEHARD} by G.Marsaglia, \href{}{\url{}}
\ARROW {\color{RoyalPurple} Modern approach:}\\
Us the same formalism as is in studies the classical chaotic dynamical systems (same formalism is used in the modern generators).\\
\ARROW The \texttt{RANLUX} generator fulfils the chaotic test and all known classical tests (not surprisingly ;) )



\begin{frame}\frametitle{The $\chi^2$ texts with $\mathcal{U}(0,1)$}
 \ARROW The algorithm:
 \item Divide the $[0,1)$ into $k$ subdivisions:\\ \begin{align*}
 \item Let $a_{n_i}={X_1,X_2,..X_n}$ be an series of elements in the interval $[a_{i-1}, a_i)$ (with $n_i$ elements). The $p_i=P({a_{i-1}<X<a_i}) =a_i-a_{i-1}$.
 \item A random variable:
 \chi^2_k=\sum_{i=1}^k \dfrac{(n_i-np_i)^2}{np_i},~~~~n=\sum_{i=1}^kn_i,
 had a $\chi^2$ distribution of $k-1$ degrees of freedom.
 \ARROW The above hypothesis verifies if the random numbers are indeed $\mathcal{U}(0,1)$.\\
 \ARROW The $\chi^2$ distribution: $X \in  \mathbb{R}, X>0, N \in \mathbb{N}$:
 \rho(X)=\dfrac{1}{2} \left(\dfrac{X}{2}\right)^{\frac{N}{2}-1} e^{\frac{X}{2} }\left[\Gamma \left(\frac{N}{2}\right)\right]^{-1}~~~~E(X)=N,~~~V(X)=2N


\begin{frame}\frametitle{The multi dimension test}
 \ARROW From the obtained numbers we construct an $m$ dimension points:\begin{align*}
 (X_1,X_2,...,X_m),(X_{m+1},...,X_{2m}),...,(X_{(n-1)m+1}, X_{nm}) 
 \item In principle they should have a uniform distribution in an $(0,1)^m$ hipercube.
 \item we divide each edge of the hipercube into $k$ equal subdivisions: $[j-1]/k, j/k),~j=1...k$.
 \item Now: $n_i$ is the number of $m$ dimensional points, which are in the $i$-th hipercube. 
 \item The $\chi^2$ test statistics:
 \chi^2_{k^m-1} = \dfrac{k^m}{n} \sum_{i=1}^{k^m}n_i^2-n,~~~~n=\sum_{i=1}^{k^m} n_i
\ARROW Now we construct other points:
\item For $N$ random numbers we have $N-m+1$ such numbers.
\item We define the statistics:
\psi_0^2=0,~~~~~\psi^2_m=\sum_{i=1}^{k^m}\dfrac{\left[n_i - \left(N-m+1 \right)/k^m\right]^2}{\left(N-m+1\right)/k^m},~~~m=1,2,...
\item For large $N$ the $(\psi^2_m-\psi^2_{m-1})$ has a $\chi^2$ distribution with $k^m-k^{m-1}$ degrees of freedom.


\ARROW The OPSO (G.Marsaglia 1984)is an analysis of pairs obtained from random number generator.\\
$X_1,X_2,...,X_n$ - $n$ random numbers obtained from generator. From each number we take $b$ bits from which we construct a second series: $I_1, I_2,...,I_n$, where $I_j \in \left[0,1,...,2^b-1 \right]$.\end{exampleblock}
\ARROW Next we create the pair series:
(I_1,I_2),(I_2,I_3),...(I_{n-1}, I_n)
\ARROW $Y$ - number of pairs from : ${ (i,j):i,j=0,1,...,2^b-1}$, which DIDN'T occur in the above series.
\ARROW This kind of test can be exteded to triple-pairs, and quadro-pairs.\\
\ARROW See DIEHARD G.Marsgalia 1993 \href{}{} 


\begin{frame}\frametitle{Kolmogorov - Smirnov}
\ARROW The K-S test is used to check if a Random variable has pdf of a distribution $F$. The test is based on the difference between the two distributions:
D_n=\sup_{-\infty < x<\infty}  \vert F_n(x) -F(x) \vert,~~~~~F_n =\dfrac{1}{n}\sum_{j=1}^n \Theta(x-X_j).
\ARROW If the random generator is from the $F$ distribution then the $D_n \to 0$ with the probability 1.\\ \ARROW Large values of $D_n$ exclude the generator.
\ARROW The critical values of the test $D_n(\alpha)$ can be find in the mathematical tables for every $\alpha$:
\ARROW They do not depend on the $F$ function.\\
\ARROW For the $\mathcal{U}(0,1)$:


\begin{frame}\frametitle{Kolmogorov - Smirnov in practice}

\begin{exampleblock}{ Take note:}
Empirical CDF of $F_n$ is a step function and $\sup_{-\infty < x<\infty} \vert F_n(x) -F(x) \vert$ is achieved only in one point!
\ARROW In practice one should sort the numbers: $X_1,..., X_n$ and calculate the following:
D_n^{+}=\max_{1 \leq i\leq n} \left(\dfrac{i}{n} - F(X_{i:n}) \right),~~~~D_n^{-}=\max_{1\leq i\leq n} \left(F(X_{i:n})- \dfrac{i-1}{n}\right)\\
D_n=\max\lbrace D_n^+, D_n^- \rbrace
where $X_{i:n}$ is so-called position statistic: $X_{1:n}, X_{2:n},..., X_{i:n}$.\\
\ARROW The statistic $D_n$ asymptotically (in practice $n \geqslant 80$ ) is approaching the $\lambda$-Kolomogorows cdf:
\lim_{n \to \infty} \mathcal{P} \lbrace\sqrt{n}D_n  \leqslant t \rbrace =K(t)= \sum_{j=-\infty}^{\infty} (-1)^j e^{-2j^2 t^2},~t>0
for which the critical values $\lambda_{\alpha}(\mathcal{P}\lbrace\sqrt{n}D_n\rbrace ) >\lambda_{\alpha}$ can be found in the mathematical tables.\\
\ARROW Commonly the $\lambda_{0.1}=1.224$, $\lambda_{0.05}=1.358$, $\lambda_{0.01}=1.628$ are used.


\begin{frame}\frametitle{Statistic distributions test- sum test}
\ARROW The $h$ function has the form:
\ARROW the random variables form the new pdf:
\dfrac{1}{m-1} \left[ y^{m-1} - {m \choose 1}(y-1)^{m-1} +{m \choose 2}(y-2)^{m-1}-..   \right]~~&{\rm for~}  0\leq y\leq m,\\
0~~&{\rm else}
where you stop when $y-m$ is negative.
\ARROW For $m=2$ we have the triangle pdf:
g_2(y) =\begin{cases}
y,~{\rm for}~0 \leq y \leq 1\\
2-y,~{\rm for}~0 \leq y \leq 1\\
\ARROW For $m=3$ we have the triangle pdf:
g_3(y) =\begin{cases}
\dfrac{1}{2} y^2,~{\rm for}~0 \leq y \leq 1\\
\dfrac{1}{2} \left[y^2-3(y-1)^2\right],~{\rm for}~1 \leq y \leq 2\\
\dfrac{1}{2} \left[y^2-3(y-1)^2 3(y-2)^2  \right],~{\rm for}~2 \leq y \leq 3\\
\ARROW For large $m$ the $g_m$ approaches the normal distribution.


\begin{frame}\frametitle{Statistic distributions test- $d^2$}
\ARROW for $m=4$ we define the $h$:
 y=(x_1-x_3)^2 +(x_2-X_4)^2
aka the square distance between $(x_1,x_2)$ and $(x_3,x_4)$.\\
\ARROW If the $X_1$, $X_2$, $X_3$, $X_4$ are from $\mathcal{U}(0,1)$ then:
d^2 = (X_1-X_3)^2+(X_2-X_4)^2
had a pdf given by the following formula:
\mathcal{P}(d^2-y) =\begin{cases}
\pi y - \dfrac{8}{3}y^{\dfrac{3}{2}} + \dfrac{1}{2} y^2 ~~ &{\rm for }~0 \leq y \leq1 \\
-\dfrac{1}{2} y^2-4 {\rm arcsec}  (y^{\dfrac{1}{2}})~~ &{\rm for}~ 1 \leq y \leq2 \\
\ARROW Test is to check if the generated numbers have the aforementioned distribution.


\begin{frame}\frametitle{Statistic distributions test- pair distance}
\ARROW Generate $n$ points from $(0,1)^m$. We take ${ n \choose 2}$ pairs of points and we calculate the distance between them.\\
\ARROW If $D$ is the smallest distance between the pairs $\longmapsto$ for the $\mathcal{U}(0,1)^m$ the $T=n^2D^m/2$ has the exponential distribution with the mean $1/V_m$, where $V_m$ is the hiper volume of the unite ball.\\
\ARROW In Patrice:\\
\item We generate $Nn$ points in the hipercube $(0,1)^m$, getting $N$ points in the $T$ statistics.
\item We compare the empirical distribution $T$ with the exponential distribution.
\item WARNING: the $N,n,m$ need to be choose smartly for the test to make sense.
\ARROW Linear generators usually fail this test!

\begin{frame}\frametitle{Statistic distributions test- series test}
\ARROW Lets assume our numbers are generated with a CDF $F$. The values of $F$ we divide in two separated sub-samples: $A$ and $B$. \\
\ARROW Furthermore we define the new variables $Y$ such as:
=a X ~\in A\\
=b X~ \in B
\ARROW The random number sequence we transform the $X_1,X_2,X_3,...,X_n$ into $Y_1,Y_2,Y_3,...,Y_n$. \\
\ARROW Next we make series: For example the $a,a,b,a,a, b,b,b, a$ will be grouped into $aa$, $b$, $aa$, $bbb$, $a$.\\
\ARROW Let $n_a$ be number of $a$ symbols in $Y_1,Y_2,Y_3,...,Y_n$. $n_b=N-n_a$. \\
\ARROW Distribution of number of series ($R$) is given by the equation:
\mathcal{P}(R=r, n_a,n_b)=\begin{cases}
2 {n_a- 1 \choose k-1}{n_b-1 \choose k-1}/{N \choose n_a}~{\rm if}~r=2k\\
[{n_a- 1 \choose k}{n_b-1 \choose k-1} +{n_a- 1 \choose k-1}{n_b-1 \choose k} ]    /{N \choose n_a}~{\rm if}~r=2k+1


\begin{frame}\frametitle{Statistic distributions test- poker test}
\ARROW The values of $X$ random variable we divide into $k$ identical sub samples:
\ARROW For $X_1,X_2,...,X_n$ from $\mathcal{U}(0,1)$:
\ARROW We create the new variables $Y_1$ accordingly:
Y_j=i~{\rm if}~X_j\in(a_{i-1},a_i),~i=0,1,...k-1
\ARROW Now we create ''the fives'':
\ARROW There are couple of types of fives:
\item[aabcd] pair
\item[aaabc] three
\item[aaaab] four
\item[aaaaa] five


\begin{frame}\frametitle{Statistic distributions test- poker test}
\ARROW If the variables are independent then we can calculate the probability:
\ARROW In practice people choose: $k=2,8,10$\\
\ARROW The agreemnt of the distribution of different types of fives is check using the $\chi^2$ test.


\ARROW There are infinite number of tests one can invent for the testing of the generators.\\
\ARROW All of the tests are in the same taste: invent a problem where you know the analytic solution, solve the problem and compare the results.\\
\ARROW Homework: Use one of the previously implemented random number generator and :
\item E5.1 Test them with chi-square test k=10.
\item E5.2 Kolomorov-smirnon.
\item E5.3 Multidimensional test.




