diff --git a/Report/00_main.aux b/Report/00_main.aux index 717b169..8042a59 100644 --- a/Report/00_main.aux +++ b/Report/00_main.aux @@ -30,3 +30,9 @@ \bibcite{augustin2017mupix}{5} \bibcite{philipp2015hv}{6} \bibcite{augustin2015mupix}{7} +\bibcite{connor1994recurrent}{8} +\bibcite{grossberg2013recurrent}{9} +\bibcite{ML:XGBoost}{10} +\bibcite{chollet2015keras}{11} +\bibcite{abadi2016tensorflow}{12} +\bibcite{klambauer2017self}{13} diff --git a/Report/00_main.bbl b/Report/00_main.bbl index 212bee1..cd4b21d 100644 --- a/Report/00_main.bbl +++ b/Report/00_main.bbl @@ -1,4 +1,4 @@ -\begin{thebibliography}{1} +\begin{thebibliography}{10} \bibitem{thomson2013modern} Mark Thomson. @@ -48,4 +48,36 @@ experiment. \newblock {\em Journal of Instrumentation}, 10(03):C03044, 2015. +\bibitem{connor1994recurrent} +Jerome~T Connor, R~Douglas Martin, and Les~E Atlas. +\newblock Recurrent neural networks and robust time series prediction. +\newblock {\em IEEE transactions on neural networks}, 5(2):240--254, 1994. + +\bibitem{grossberg2013recurrent} +Stephen Grossberg. +\newblock Recurrent neural networks. +\newblock {\em Scholarpedia}, 8(2):1888, 2013. + +\bibitem{ML:XGBoost} +Tianqi Chen and Carlos Guestrin. +\newblock Xgboost: {A} scalable tree boosting system. +\newblock {\em CoRR}, abs/1603.02754, 2016. + +\bibitem{chollet2015keras} +Fran{\c{c}}ois Chollet et~al. +\newblock Keras: Deep learning library for theano and tensorflow. +\newblock {\em URL: https://keras. io/k}, 7(8), 2015. + +\bibitem{abadi2016tensorflow} +Mart{\'\i}n Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey + Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, et~al. +\newblock Tensorflow: a system for large-scale machine learning. +\newblock In {\em OSDI}, volume~16, pages 265--283, 2016. + +\bibitem{klambauer2017self} +G{\"u}nter Klambauer, Thomas Unterthiner, Andreas Mayr, and Sepp Hochreiter. +\newblock Self-normalizing neural networks. +\newblock In {\em Advances in Neural Information Processing Systems}, pages + 971--980, 2017. + \end{thebibliography} diff --git a/Report/00_main.log b/Report/00_main.log index ef2fa36..158f946 100644 --- a/Report/00_main.log +++ b/Report/00_main.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.14159265-2.6-1.40.19 (MiKTeX 2.9.6730 64-bit) (preloaded format=pdflatex 2018.7.26) 29 JUL 2018 11:38 +This is pdfTeX, Version 3.14159265-2.6-1.40.19 (MiKTeX 2.9.6730 64-bit) (preloaded format=pdflatex 2018.7.26) 29 JUL 2018 21:59 entering extended mode **./00_main.tex (00_main.tex @@ -1687,85 +1687,85 @@ [] - + File: img/beta_decay_feynman.png Graphic file (type png) Package pdftex.def Info: img/beta_decay_feynman.png used on input line 89. (pdftex.def) Requested size: 140.39958pt x 140.40762pt. - + File: img/muon-decay-feynman.png Graphic file (type png) Package pdftex.def Info: img/muon-decay-feynman.png used on input line 94. (pdftex.def) Requested size: 140.39958pt x 119.05476pt. -Underfull \hbox (badness 10000) in paragraph at lines 101--104 +Underfull \hbox (badness 10000) in paragraph at lines 102--105 [] -Underfull \hbox (badness 10000) in paragraph at lines 101--104 +Underfull \hbox (badness 10000) in paragraph at lines 102--105 [] [6 <./img/beta_decay_feynman.png> <./img/muon-decay-feynman.png>] -Underfull \hbox (badness 10000) in paragraph at lines 124--125 +Underfull \hbox (badness 10000) in paragraph at lines 125--126 [] -Underfull \hbox (badness 10000) in paragraph at lines 124--125 +Underfull \hbox (badness 10000) in paragraph at lines 125--126 [] -Underfull \hbox (badness 10000) in paragraph at lines 126--127 +Underfull \hbox (badness 10000) in paragraph at lines 127--128 [] -Underfull \hbox (badness 10000) in paragraph at lines 126--127 +Underfull \hbox (badness 10000) in paragraph at lines 127--128 [] [7] - + File: img/neutrino_oscillation.png Graphic file (type png) -Package pdftex.def Info: img/neutrino_oscillation.png used on input line 147. +Package pdftex.def Info: img/neutrino_oscillation.png used on input line 148. (pdftex.def) Requested size: 312.00119pt x 131.50337pt. -Underfull \hbox (badness 10000) in paragraph at lines 159--161 +Underfull \hbox (badness 10000) in paragraph at lines 160--162 [] -Underfull \hbox (badness 10000) in paragraph at lines 159--161 +Underfull \hbox (badness 10000) in paragraph at lines 160--162 [] -Underfull \hbox (badness 10000) in paragraph at lines 162--166 +Underfull \hbox (badness 10000) in paragraph at lines 163--167 [] [8 <./img/neutrino_oscillation.png>] - + File: img/LFV-neutrino_osc.png Graphic file (type png) -Package pdftex.def Info: img/LFV-neutrino_osc.png used on input line 170. +Package pdftex.def Info: img/LFV-neutrino_osc.png used on input line 171. (pdftex.def) Requested size: 140.39958pt x 70.74054pt. - + File: img/LFV-SUSY.png Graphic file (type png) -Package pdftex.def Info: img/LFV-SUSY.png used on input line 175. +Package pdftex.def Info: img/LFV-SUSY.png used on input line 176. (pdftex.def) Requested size: 140.39958pt x 71.59482pt. - + File: img/LFV-tree_lvl.png Graphic file (type png) -Package pdftex.def Info: img/LFV-tree_lvl.png used on input line 180. +Package pdftex.def Info: img/LFV-tree_lvl.png used on input line 181. (pdftex.def) Requested size: 140.39958pt x 94.67162pt. -) [9 <./img/LFV-neutrino_osc.png> <./img/LFV-SUSY.png> <./img/LFV-tree_lvl.png> -] [10] + [9 <./img/LFV-neutrino_osc.png> <./img/LFV-SUSY.png> <./img/LFV-tree_lvl.png>] +) [10] \openout2 = `02_mu_to_3e_decay.aux'. (02_mu_to_3e_decay.tex @@ -1822,27 +1822,27 @@ [] - + File: img/setup-Ia.png Graphic file (type png) Package pdftex.def Info: img/setup-Ia.png used on input line 23. (pdftex.def) Requested size: 312.00119pt x 145.84636pt. - + File: img/tracks-phase_I.png Graphic file (type png) Package pdftex.def Info: img/tracks-phase_I.png used on input line 28. (pdftex.def) Requested size: 140.39958pt x 143.79482pt. - + File: img/tracks-phase_II.png Graphic file (type png) Package pdftex.def Info: img/tracks-phase_II.png used on input line 33. (pdftex.def) Requested size: 140.39958pt x 145.23878pt. - + File: img/setup-Ib.png Graphic file (type png) Package pdftex.def Info: img/setup-Ib.png used on input line 38. (pdftex.def) Requested size: 390.0pt x 123.95313pt. - + File: img/setup-II.png Graphic file (type png) Package pdftex.def Info: img/setup-II.png used on input line 43. @@ -1851,67 +1851,131 @@ ] -Overfull \vbox (5.60493pt too high) has occurred while \output is active [] +Overfull \vbox (30.10492pt too high) has occurred while \output is active [] [15 <./img/setup-Ia.png> <./img/tracks-phase_I.png> <./img/tracks-phase_II.png> <./img/setup-Ib.png> <./img/setup-II.png>] -Underfull \hbox (badness 10000) in paragraph at lines 50--51 +Underfull \hbox (badness 10000) in paragraph at lines 51--52 [] -Underfull \hbox (badness 10000) in paragraph at lines 60--61 +Underfull \hbox (badness 10000) in paragraph at lines 61--62 [] -Underfull \hbox (badness 10000) in paragraph at lines 60--61 +Underfull \hbox (badness 10000) in paragraph at lines 61--62 [] [16] -Underfull \hbox (badness 10000) in paragraph at lines 62--66 +Underfull \hbox (badness 10000) in paragraph at lines 63--67 [] - + File: img/tracks_in_det_xy.png Graphic file (type png) -Package pdftex.def Info: img/tracks_in_det_xy.png used on input line 69. -(pdftex.def) Requested size: 312.00119pt x 363.32416pt. +Package pdftex.def Info: img/tracks_in_det_xy.png used on input line 70. +(pdftex.def) Requested size: 390.0pt x 454.15863pt. ) [17] [18 <./img/tracks_in_det_xy.png>] \openout2 = `04_machine_learning.aux'. - (04_machine_learning.tex) [19 + (04_machine_learning.tex +Underfull \hbox (badness 10000) in paragraph at lines 5--6 + + [] + + +Underfull \hbox (badness 10000) in paragraph at lines 11--12 + + [] + +[19 ] -(00_main.bbl) + +File: img/neural_network.png Graphic file (type png) + +Package pdftex.def Info: img/neural_network.png used on input line 26. +(pdftex.def) Requested size: 312.00119pt x 169.33112pt. + +File: img/neuron.png Graphic file (type png) + +Package pdftex.def Info: img/neuron.png used on input line 31. +(pdftex.def) Requested size: 156.0006pt x 99.30911pt. + +Underfull \hbox (badness 10000) in paragraph at lines 39--40 + + [] + +[20 <./img/neural_network.png> <./img/neuron.png>] + +File: img/selu.png Graphic file (type png) + +Package pdftex.def Info: img/selu.png used on input line 54. +(pdftex.def) Requested size: 175.49881pt x 120.31479pt. + +File: img/relu.png Graphic file (type png) + +Package pdftex.def Info: img/relu.png used on input line 59. +(pdftex.def) Requested size: 175.49881pt x 120.00516pt. + +File: img/tanh.png Graphic file (type png) + +Package pdftex.def Info: img/tanh.png used on input line 64. +(pdftex.def) Requested size: 175.49881pt x 115.55222pt. + +Underfull \hbox (badness 10000) in paragraph at lines 72--73 + + [] + +[21 <./img/selu.png> <./img/relu.png> <./img/tanh.png>] +LaTeX Font Info: Try loading font information for OMS+cmr on input line 89. + +("C:\Program Files\MiKTeX 2.9\tex\latex\base\omscmr.fd" +File: omscmr.fd 2014/09/29 v2.5h Standard LaTeX font definitions +) +LaTeX Font Info: Font shape `OMS/cmr/m/n' in size <12> not available +(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 89. + [22]) [23] +(00_main.bbl [24 + +]) Package atveryend Info: Empty hook `BeforeClearDocument' on input line 70. - [20 - -] + [25] Package atveryend Info: Empty hook `AfterLastShipout' on input line 70. - (00_main.aux (01_Standard_Model.aux) (02_mu_to_3e_decay.aux) (03_experimental_ -setup.aux) (04_machine_learning.aux)) + (00_main.aux (01_Standard_Model.aux) +(02_mu_to_3e_decay.aux) (03_experimental_setup.aux) (04_machine_learning.aux)) Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 70. Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 70. Package rerunfilecheck Info: File `00_main.out' has not changed. -(rerunfilecheck) Checksum: 7DCA6681AB20B1F3A3E3A90D403940D0;1465. +(rerunfilecheck) Checksum: 507A136055700225F066D39494392802;1605. Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 70. ) Here is how much of TeX's memory you used: - 19720 strings out of 492973 - 332943 string characters out of 3135932 - 429390 words of memory out of 3000000 - 23282 multiletter control sequences out of 15000+200000 + 19806 strings out of 492973 + 334275 string characters out of 3135932 + 430290 words of memory out of 3000000 + 23338 multiletter control sequences out of 15000+200000 548944 words of font info for 87 fonts, out of 3000000 for 9000 1141 hyphenation exceptions out of 8191 - 47i,16n,65p,1103b,571s stack positions out of 5000i,500n,10000p,200000b,50000s -pdfTeX warning (dest): name{Hfootnote.14} has been referenced but does not ex + 47i,19n,65p,1103b,571s stack positions out of 5000i,500n,10000p,200000b,50000s +pdfTeX warning (dest): name{Hfootnote.17} has been referenced but does not ex ist, replaced by a fixed one +pdfTeX warning (dest): name{Hfootnote.16} has been referenced but does not exis +t, replaced by a fixed one + +pdfTeX warning (dest): name{Hfootnote.15} has been referenced but does not exis +t, replaced by a fixed one + +pdfTeX warning (dest): name{Hfootnote.14} has been referenced but does not exis +t, replaced by a fixed one + pdfTeX warning (dest): name{Hfootnote.13} has been referenced but does not exis t, replaced by a fixed one @@ -1968,9 +2032,9 @@ onts/cm/cmsy7.pfb> -Output written on 00_main.pdf (20 pages, 656561 bytes). +Output written on 00_main.pdf (25 pages, 756234 bytes). PDF statistics: - 380 PDF objects out of 1000 (max. 8388607) - 78 named destinations out of 1000 (max. 500000) - 237 words of extra memory for PDF output out of 10000 (max. 10000000) + 453 PDF objects out of 1000 (max. 8388607) + 101 named destinations out of 1000 (max. 500000) + 278 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/Report/00_main.out b/Report/00_main.out index 9ec84c2..3ce0a73 100644 --- a/Report/00_main.out +++ b/Report/00_main.out @@ -20,3 +20,5 @@ \BOOKMARK [2][-]{subsection.4.4}{Experimental setup}{section.4}% 20 \BOOKMARK [2][-]{subsection.4.5}{The problem of low longitudinal momentum recurlers}{section.4}% 21 \BOOKMARK [1][-]{section.5}{Machine learning}{}% 22 +\BOOKMARK [2][-]{subsection.5.1}{Introduction}{section.5}% 23 +\BOOKMARK [2][-]{subsection.5.2}{Artificial neural networks}{section.5}% 24 diff --git a/Report/00_main.pdf b/Report/00_main.pdf index 1651709..07a74e2 100644 --- a/Report/00_main.pdf +++ b/Report/00_main.pdf Binary files differ diff --git a/Report/00_main.synctex.gz b/Report/00_main.synctex.gz index 0539c1d..783fa73 100644 --- a/Report/00_main.synctex.gz +++ b/Report/00_main.synctex.gz Binary files differ diff --git a/Report/00_main.toc b/Report/00_main.toc index 97b224a..2e08b37 100644 --- a/Report/00_main.toc +++ b/Report/00_main.toc @@ -21,3 +21,5 @@ \contentsline {subsection}{\numberline {4.4}Experimental setup}{14}{subsection.4.4} \contentsline {subsection}{\numberline {4.5}The problem of low longitudinal momentum recurlers}{17}{subsection.4.5} \contentsline {section}{\numberline {5}Machine learning}{19}{section.5} +\contentsline {subsection}{\numberline {5.1}Introduction}{19}{subsection.5.1} +\contentsline {subsection}{\numberline {5.2}Artificial neural networks}{19}{subsection.5.2} diff --git a/Report/01_Standard_Model.aux b/Report/01_Standard_Model.aux index 34bf2b2..057e74c 100644 --- a/Report/01_Standard_Model.aux +++ b/Report/01_Standard_Model.aux @@ -19,6 +19,7 @@ \newlabel{sub@beta-decay_feynman}{{a}{6}{Feynman diagram of the $\beta $-decay\relax }{figure.caption.5}{}} \newlabel{muon-decay_feynman}{{1b}{6}{Feynman diagram of a $\mu $-decay\relax }{figure.caption.5}{}} \newlabel{sub@muon-decay_feynman}{{b}{6}{Feynman diagram of a $\mu $-decay\relax }{figure.caption.5}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Certain diagrams of decays\relax }}{6}{figure.caption.5}} \citation{abe2008precision} \citation{adamson2011measurement} \@writefile{toc}{\contentsline {section}{\numberline {2}Physics beyond the SM}{7}{section.2}} @@ -34,6 +35,7 @@ \newlabel{sub@LFV-SUSY}{{b}{9}{LFV by using supersymmetric particles\relax }{figure.caption.7}{}} \newlabel{LFV-tree_lvl}{{3c}{9}{LFV at tree level\relax }{figure.caption.7}{}} \newlabel{sub@LFV-tree_lvl}{{c}{9}{LFV at tree level\relax }{figure.caption.7}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Charged LFV\relax }}{9}{figure.caption.7}} \@setckpt{01_Standard_Model}{ \setcounter{page}{11} \setcounter{equation}{2} diff --git a/Report/01_Standard_Model.tex b/Report/01_Standard_Model.tex index 14da527..4ffa87f 100644 --- a/Report/01_Standard_Model.tex +++ b/Report/01_Standard_Model.tex @@ -95,6 +95,7 @@ \caption{Feynman diagram of a $\mu$-decay} \label{muon-decay_feynman} \end{subfigure} +\caption{Certain diagrams of decays} \end{center} \end{figure} @@ -181,6 +182,7 @@ \caption{LFV at tree level} \label{LFV-tree_lvl} \end{subfigure} +\caption{Charged LFV} \end{center} \end{figure} diff --git a/Report/03_experimental_setup.aux b/Report/03_experimental_setup.aux index c3cf073..001ab2b 100644 --- a/Report/03_experimental_setup.aux +++ b/Report/03_experimental_setup.aux @@ -16,6 +16,7 @@ \newlabel{sub@setup_Ib}{{d}{15}{Setup of the detector in the second part of phase I\relax }{figure.caption.8}{}} \newlabel{setup_II}{{4e}{15}{Setup of the detector in phase II\relax }{figure.caption.8}{}} \newlabel{sub@setup_II}{{e}{15}{Setup of the detector in phase II\relax }{figure.caption.8}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Setup of the detector during different phases of the experiment\relax }}{15}{figure.caption.8}} \citation{augustin2017mupix} \citation{philipp2015hv} \citation{augustin2015mupix} diff --git a/Report/03_experimental_setup.tex b/Report/03_experimental_setup.tex index 60f81b3..a5a5f9a 100644 --- a/Report/03_experimental_setup.tex +++ b/Report/03_experimental_setup.tex @@ -44,6 +44,7 @@ \caption{Setup of the detector in phase II} \label{setup_II} \end{subfigure} +\caption{Setup of the detector during different phases of the experiment} \end{center} \end{figure}\newpage @@ -66,7 +67,7 @@ \begin{figure}[H] \begin{center} -\includegraphics[width=0.8\textwidth]{img/tracks_in_det_xy.png} +\includegraphics[width=1\textwidth]{img/tracks_in_det_xy.png} \caption{Particle recurling back into the center station} \label{recurler} \end{center} diff --git a/Report/04_machine_learning.aux b/Report/04_machine_learning.aux index ad2513a..d742127 100644 --- a/Report/04_machine_learning.aux +++ b/Report/04_machine_learning.aux @@ -1,37 +1,59 @@ \relax \providecommand\hyper@newdestlabel[2]{} +\citation{connor1994recurrent} +\citation{grossberg2013recurrent} +\citation{ML:XGBoost} +\citation{chollet2015keras} +\citation{abadi2016tensorflow} \@writefile{toc}{\contentsline {section}{\numberline {5}Machine learning}{19}{section.5}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Introduction}{19}{subsection.5.1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Artificial neural networks}{19}{subsection.5.2}} +\newlabel{neural_network_arch}{{6a}{20}{Architecture of a neural network\relax }{figure.caption.10}{}} +\newlabel{sub@neural_network_arch}{{a}{20}{Architecture of a neural network\relax }{figure.caption.10}{}} +\newlabel{neuron}{{6b}{20}{Neuron\relax }{figure.caption.10}{}} +\newlabel{sub@neuron}{{b}{20}{Neuron\relax }{figure.caption.10}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Neural network architecture\relax }}{20}{figure.caption.10}} +\citation{klambauer2017self} +\newlabel{selu}{{7a}{21}{Selu activation function\relax }{figure.caption.11}{}} +\newlabel{sub@selu}{{a}{21}{Selu activation function\relax }{figure.caption.11}{}} +\newlabel{relu}{{7b}{21}{Relu activation function\relax }{figure.caption.11}{}} +\newlabel{sub@relu}{{b}{21}{Relu activation function\relax }{figure.caption.11}{}} +\newlabel{tanh}{{7c}{21}{Tanh activation function\relax }{figure.caption.11}{}} +\newlabel{sub@tanh}{{c}{21}{Tanh activation function\relax }{figure.caption.11}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Activation functions\relax }}{21}{figure.caption.11}} +\newlabel{MSE}{{8}{22}{Artificial neural networks}{equation.5.8}{}} +\newlabel{BC}{{9}{22}{Artificial neural networks}{equation.5.9}{}} \@setckpt{04_machine_learning}{ -\setcounter{page}{20} -\setcounter{equation}{4} +\setcounter{page}{24} +\setcounter{equation}{9} \setcounter{enumi}{0} \setcounter{enumii}{0} \setcounter{enumiii}{0} \setcounter{enumiv}{0} -\setcounter{footnote}{14} +\setcounter{footnote}{17} \setcounter{mpfootnote}{0} \setcounter{part}{0} \setcounter{section}{5} -\setcounter{subsection}{0} +\setcounter{subsection}{2} \setcounter{subsubsection}{0} \setcounter{paragraph}{0} \setcounter{subparagraph}{0} -\setcounter{figure}{5} +\setcounter{figure}{7} \setcounter{table}{3} \setcounter{parentequation}{0} \setcounter{AM@survey}{0} \setcounter{ContinuedFloat}{0} -\setcounter{subfigure}{0} +\setcounter{subfigure}{3} \setcounter{subtable}{0} \setcounter{float@type}{4} \setcounter{Item}{0} -\setcounter{Hfootnote}{14} -\setcounter{bookmark@seq@number}{22} +\setcounter{Hfootnote}{17} +\setcounter{bookmark@seq@number}{24} \setcounter{@stackindex}{1} \setcounter{ROWcellindex@}{0} \setcounter{TABrowindex@}{2} \setcounter{TABcolindex@}{1} \setcounter{TABalignmentindex@}{0} \setcounter{pp@next@reset}{0} -\setcounter{section@level}{1} +\setcounter{section@level}{2} } diff --git a/Report/04_machine_learning.tex b/Report/04_machine_learning.tex index 629fbc7..c2fe3bf 100644 --- a/Report/04_machine_learning.tex +++ b/Report/04_machine_learning.tex @@ -1,3 +1,110 @@ \section{Machine learning} -Machine learning has already proven itself to be very successful in resolving many problems in numerous other areas of science and also in the private sector. Based on these promising results, scientists are eager to study the potential of machine learning in physics. \ No newline at end of file +\subsection{Introduction} + +Machine learning has already proven itself to be very successful in resolving many problems in numerous other areas of science and also in the private sector. Based on these promising results, scientists are eager to study the potential of machine learning in physics.\\ + +There are several sections of machine learning. In this paper, we will focus mainly on neural networks(NN), with special attention to recurrent neural networks(RNN) \cite{connor1994recurrent}, \cite{grossberg2013recurrent} and XGBoost(XGB) \cite{ML:XGBoost} models with boosted decision trees. + +\subsection{Artificial neural networks} + +The fundamental concept behind artificial neural networks is to imitate the architecture of the human brain. They can be used for classification problems as well as regression problems. In its most simple form it can be thought of some sort of mapping from some input to some target. For this thesis two neural networks of a special subtype of neural networks, called recurrent neural networks, were used. All of the networks used in this thesis were written in the python library Keras \cite{chollet2015keras} with a Tensorflow \cite{abadi2016tensorflow} backend. In this section the basic principles of neural networks will be explained.\\ + +A neural network consists of many neurons organized in layers as seen in figure \ref{neural_network_arch}. Each neuron is connected to every neuron in the neighbouring layers, while each of these connections has a specific weight assigned to it.\\ +In its most basic form, each neuron calculates a weighted sum to all of its inputs and then applies a bias to it . In addition, each neuron has an activation function, which will be applied at the end of the calculation (see also figure \ref{neuron}): + +\begin{equation} +y = f_{activation} \left(\sum^n_{i=1}(x_i\cdot w_i) + b\right) +\end{equation} + +This is done to create non linearity in the system. Later, some more complex architectures of neurons will be presented.\\ +The first layer, also called input layer, is always defined by the number of inputs, with one dimension for each input. The dimensions of the following layers (excluding the last one), which are also called hidden layers, can be chosen to be an arbitrarily number. The number of dimensions of the last layer, also called output layer, is determined by the dimensionality of the prediction. The number of hidden layers, and their corresponding dimension, changes the performance of the system. + +\begin{figure}[H] +\begin{center} +\begin{subfigure}{0.8\textwidth} +\includegraphics[width=1\textwidth]{img/neural_network.png} +\caption{Architecture of a neural network} +\label{neural_network_arch} +\end{subfigure} +\begin{subfigure}{0.5\textwidth} +\includegraphics[width=0.8\textwidth]{img/neuron.png} +\caption{Neuron} +\label{neuron} +\end{subfigure} +\caption{Neural network architecture} +\end{center} +\end{figure} + +There is no way of knowing how many dimensions and layers will give you the best performance, as one can only define general effects of what happens when they are being modified. Generally, increasing the number of layers enables the system to solve more complex problems, while more dimensions make the system more flexible. However, even these general guidelines are to be applied with caution. For example; adding too many layers can cause the system to train exceedingly slow, whilst adding to many neurons with a too small training set can result in overfitting\footnote{When a system performs well on the training set but poorly on the test set}. Depending on the problem and the data given, each has its own optimal configuration. By gaining more experience with NN, people can take better guesses where to start. However, in the end it always results in some sort of systematic trial and error to find the optimal configuration.\\ + +The two RNN's used in this thesis both use $selu$'s \cite{klambauer2017self} as well as $tanh$ and $relu$'s as their activation functions. + +\begin{align} +selu(x) = \lambda \begin{cases} + x, & \text{if $x<0$}\\ + \alpha e^x - \alpha, & \text{otherwise} + \end{cases}\\ +relu(x) = MAX(0, x) +\end{align} + +\begin{figure}[H] +\begin{center} +\begin{subfigure}{0.45\textwidth} +\includegraphics[width=1\textwidth]{img/selu.png} +\caption{Selu activation function} +\label{selu} +\end{subfigure} +\begin{subfigure}{0.45\textwidth} +\includegraphics[width=1\textwidth]{img/relu.png} +\caption{Relu activation function} +\label{relu} +\end{subfigure} +\begin{subfigure}{0.45\textwidth} +\includegraphics[width=1\textwidth]{img/tanh.png} +\caption{Tanh activation function} +\label{tanh} +\end{subfigure} +\caption{Activation functions} +\end{center} +\end{figure} + +Where $\lambda$ and $\alpha$ are fixed parameters\footnote{For standard scaled inputs (mean $= 0$, stddev. $=1.0$): $\alpha \approx 1.6732$, $\lambda \approx 1.0507$}. Selu's have the advantage of normalizing the output. As a rule of thumb, normalized inputs usually tend to give better results (The output of the neurons are the input of other neurons). Using a $tanh$ was the standard approach for a long time although it has some disadvantages over the other activation functions. This is because its slope becomes really small for large numbers, which slows down training noticeably.\\ + +The neural network is trained with a sample of events. This sample consists of a few input parameters and a training target, which is the value the neural network will be trained to predict. Three important terms for the training of a neural network are epochs, batch size and loss function.\\ +An epoch refers to one training iteration, where all of the training samples get used once and the weights and biases get modified to fit the wanted targets better. Usually a system is trained over many epochs until the weights and biases stay approximately constant at their optimal values.\\ +Batch size refers to the number of examples that are given to the system at once during the training. Batch size should neither be chosen too small, e.g. small batch sizes train slower, nor too big, some randomness is wanted. Experience shows, that a reasonable batch size usually lies between 10 to 100 examples per batch. It is important to note that by decreasing batch size we make the minimum of the mapping we want to find wider. This makes finding the general area of the minimum easier. However if the minimum gets too wide, the slope gets to small to reach the minimum in a reasonable time. On the other side by increasing the batch size too much, the minimum gets exceedingly narrower and it possible to continuously keep "jumping" over the minimum with every training step performed.\\ +To train the system we need some way to parametrize the quality of our predictions. To account for that we use a loss function. A loss function takes the predicted values of the system and the targeted values to give us an absolute value of our performance. There are various loss functions. In the two RNN's "mean squared error"(MSE, formula \ref{MSE}) and "binary crossentropy"(BC, formula \ref{BC}) were being used. The goal of every NN is to minimize the loss function. + +\begin{align} +L(w,b) = \frac{1}{n} \sum^n_{i=1} (\hat{Y}_i(w_i,b_i) - Y_i)^2 +\label{MSE}\\ +L(w,b) = - \frac{1}{n} \sum^n_{i=1}\left(Y_i log(\hat{Y}_i(w_i,b_i))+(1-Y_i) log(1-\hat{Y}_i(w_i,b_i))\right) +\label{BC} +\end{align} + +With: + +\begin{itemize} +\item $w_i$ are the weights of the system +\item $b_i$ are the biases of the system +\item $\hat{Y}_i(w_i,b_i)$ are the predicted values of the system +\item $Y_i$ are the targets for the predictions +\item $L(w,b)$ is the loss over $n$ events +\end{itemize} + +There exist several methods to minimize the loss. The most simple one being stochastic gradient descent(SGD). When performing SGD we can calculate the gradient and just apply it to our weights and biases. By doing this repeatedly, we will eventually end up in a minimum\footnote{It is very possible to also just get stuck in a local minimum}.\\ +Trainings algorithm working with momentum are basically an improved version of SGD. To circumvent the problem of getting stuck in any minimum, our gradient can build up momentum of the past gradients. This is done by adding a momentum term to the applied changes to the weights and biases. The momentum is an exponentially decaying average over past gradients. This generally trains faster than SGD and has less potential to get stuck in local minima.\\ +Another commonly-used modification of stochastic gradient decent is an adaptive +learning rate as implemented in the optimizer called RMSProp. This algorithm scales +the learning rate of each individual parameter by an exponentially decaying average +of the past squared gradients. The adaptation of the learning rate is done to set a +large learning rate if the past gradients were small in order to increase the step size +and vice versa. The average of the past squared gradients is exponentially decaying +since otherwise the learning rate would get really small after a few iterations. +The optimizer used in this thesis is called adam which stands for Adaptive Moment Estimation +[25]. Adam can be described as a combination of the Momentum and RMSProp +method since the estimates of the first and second moments of the past gradients are +used to scale the learning rate of each individual parameter. The first moment is +an exponentially decaying average of past gradients as in Momentum and the second +moment is an exponentially decaying average of past squared gradients as in RMSProp. \ No newline at end of file diff --git a/Report/ML.bib b/Report/ML.bib index 8262e02..a78211b 100644 --- a/Report/ML.bib +++ b/Report/ML.bib @@ -1,17 +1,3 @@ -%%% Machine Learning Papers - -@article{ML:XGBoost, - author = {Tianqi Chen and - Carlos Guestrin}, - title = {XGBoost: {A} Scalable Tree Boosting System}, - journal = {CoRR}, - volume = {abs/1603.02754}, - year = {2016}, - url = {http://arxiv.org/abs/1603.02754}, - timestamp = {Sat, 02 Apr 2016 11:49:48 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/ChenG16}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} @article{ML:ROC_AUC:Bradley:1997:UAU:1746432.1746434, author = {Bradley, Andrew P.}, @@ -31,9 +17,9 @@ publisher = {Elsevier Science Inc.}, address = {New York, NY, USA}, keywords = {Accuracy measures, Cross-validation, Standard error, The ROC curve, The area under the ROC curve (AUC), Wilcoxon statistic}, -} +} -%%% Gradient boosted reweighting +% Gradient boosted reweighting @article{Rogozhnikov:boostedreweighting, author = "Alves~Jr., A. A. and others", title = "{The \lhcb detector at the LHC}", diff --git a/Report/bib/General.bib b/Report/bib/General.bib index 9f3ab03..29c847b 100644 --- a/Report/bib/General.bib +++ b/Report/bib/General.bib @@ -127,4 +127,102 @@ pages={C03044}, year={2015}, publisher={IOP Publishing} +} + +%Keras +@article{chollet2015keras, + title={Keras: Deep learning library for theano and tensorflow}, + author={Chollet, Fran{\c{c}}ois and others}, + journal={URL: https://keras. io/k}, + volume={7}, + number={8}, + year={2015} +} + +%Tensorflow +@inproceedings{abadi2016tensorflow, + title={Tensorflow: a system for large-scale machine learning.}, + author={Abadi, Mart{\'\i}n and Barham, Paul and Chen, Jianmin and Chen, Zhifeng and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Irving, Geoffrey and Isard, Michael and others}, + booktitle={OSDI}, + volume={16}, + pages={265--283}, + year={2016} +} + +%Batchnorm in between layers +@article{cooijmans2016recurrent, + title={Recurrent batch normalization}, + author={Cooijmans, Tim and Ballas, Nicolas and Laurent, C{\'e}sar and G{\"u}l{\c{c}}ehre, {\c{C}}a{\u{g}}lar and Courville, Aaron}, + journal={arXiv preprint arXiv:1603.09025}, + year={2016} +} + +@article{ioffe2015batch, + title={Batch normalization: Accelerating deep network training by reducing internal covariate shift}, + author={Ioffe, Sergey and Szegedy, Christian}, + journal={arXiv preprint arXiv:1502.03167}, + year={2015} +} + +%XGBoost +@inproceedings{chen2016xgboost, + title={Xgboost: A scalable tree boosting system}, + author={Chen, Tianqi and Guestrin, Carlos}, + booktitle={Proceedings of the 22nd acm sigkdd international conference on knowledge discovery and data mining}, + pages={785--794}, + year={2016}, + organization={ACM} +} + +%ROC curve +@article{ML:ROC_AUC:Bradley:1997:UAU:1746432.1746434, + author = {Bradley, Andrew P.}, + title = {The Use of the Area Under the ROC Curve in the Evaluation of Machine Learning Algorithms}, + journal = {Pattern Recogn.}, + issue_date = {July, 1997}, + volume = {30}, + number = {7}, + month = jul, + year = {1997}, + issn = {0031-3203}, + pages = {1145--1159}, + numpages = {15}, + url = {http://dx.doi.org/10.1016/S0031-3203(96)00142-2}, + doi = {10.1016/S0031-3203(96)00142-2}, + acmid = {1746434}, + publisher = {Elsevier Science Inc.}, + address = {New York, NY, USA}, + keywords = {Accuracy measures, Cross-validation, Standard error, The ROC curve, The area under the ROC curve (AUC), Wilcoxon statistic}, +} + +%RNN +@article{grossberg2013recurrent, + title={Recurrent neural networks}, + author={Grossberg, Stephen}, + journal={Scholarpedia}, + volume={8}, + number={2}, + pages={1888}, + year={2013} +} + +@article{connor1994recurrent, + title={Recurrent neural networks and robust time series prediction}, + author={Connor, Jerome T and Martin, R Douglas and Atlas, Les E}, + journal={IEEE transactions on neural networks}, + volume={5}, + number={2}, + pages={240--254}, + year={1994}, + publisher={IEEE} +} + +%Adam +@inproceedings{chilimbi2014project, + title={Project Adam: Building an Efficient and Scalable Deep Learning Training System.}, + author={Chilimbi, Trishul M and Suzue, Yutaka and Apacible, Johnson and Kalyanaraman, Karthik}, + booktitle={OSDI}, + volume={14}, + pages={571--582}, + year={2014} } \ No newline at end of file diff --git a/Report/img/Basic-neural-network-unit-neuron-node-McCulloch-and-Pitts-1943.png b/Report/img/Basic-neural-network-unit-neuron-node-McCulloch-and-Pitts-1943.png new file mode 100644 index 0000000..fa345ed --- /dev/null +++ b/Report/img/Basic-neural-network-unit-neuron-node-McCulloch-and-Pitts-1943.png Binary files differ diff --git a/Report/img/neural_network.png b/Report/img/neural_network.png new file mode 100644 index 0000000..aa84059 --- /dev/null +++ b/Report/img/neural_network.png Binary files differ diff --git a/Report/img/neuron.png b/Report/img/neuron.png new file mode 100644 index 0000000..4ff843d --- /dev/null +++ b/Report/img/neuron.png Binary files differ diff --git a/Report/img/relu.png b/Report/img/relu.png new file mode 100644 index 0000000..8d586cb --- /dev/null +++ b/Report/img/relu.png Binary files differ diff --git a/Report/img/selu.png b/Report/img/selu.png new file mode 100644 index 0000000..8b4d1c9 --- /dev/null +++ b/Report/img/selu.png Binary files differ diff --git a/Report/img/tanh.png b/Report/img/tanh.png new file mode 100644 index 0000000..99fccd7 --- /dev/null +++ b/Report/img/tanh.png Binary files differ