diff --git a/Report/00_main.aux b/Report/00_main.aux
index 8042a59..1b41696 100644
--- a/Report/00_main.aux
+++ b/Report/00_main.aux
@@ -21,6 +21,10 @@
 \@input{02_mu_to_3e_decay.aux}
 \@input{03_experimental_setup.aux}
 \@input{04_machine_learning.aux}
+\@input{05_Data.aux}
+\@input{06_RNN_used.aux}
+\@input{07_Analysis.aux}
+\@input{08_Appendix.aux}
 \bibstyle{unsrt}
 \bibdata{bib/General}
 \bibcite{thomson2013modern}{1}
@@ -36,3 +40,14 @@
 \bibcite{chollet2015keras}{11}
 \bibcite{abadi2016tensorflow}{12}
 \bibcite{klambauer2017self}{13}
+\bibcite{chilimbi2014project}{14}
+\bibcite{ioffe2015batch}{15}
+\bibcite{cooijmans2016recurrent}{16}
+\bibcite{schuster1997bidirectional}{17}
+\bibcite{gers1999learning}{18}
+\bibcite{chung2014empirical}{19}
+\bibcite{agostinelli2003s}{20}
+\bibcite{pedregosa2011scikit}{21}
+\bibcite{ML:ROC_AUC:Bradley:1997:UAU:1746432.1746434}{22}
+\bibcite{gent1992special}{23}
+\bibcite{graves2013speech}{24}
diff --git a/Report/00_main.bbl b/Report/00_main.bbl
index cd4b21d..aefb3af 100644
--- a/Report/00_main.bbl
+++ b/Report/00_main.bbl
@@ -80,4 +80,70 @@
 \newblock In {\em Advances in Neural Information Processing Systems}, pages
   971--980, 2017.
 
+\bibitem{chilimbi2014project}
+Trishul~M Chilimbi, Yutaka Suzue, Johnson Apacible, and Karthik Kalyanaraman.
+\newblock Project adam: Building an efficient and scalable deep learning
+  training system.
+\newblock In {\em OSDI}, volume~14, pages 571--582, 2014.
+
+\bibitem{ioffe2015batch}
+Sergey Ioffe and Christian Szegedy.
+\newblock Batch normalization: Accelerating deep network training by reducing
+  internal covariate shift.
+\newblock {\em arXiv preprint arXiv:1502.03167}, 2015.
+
+\bibitem{cooijmans2016recurrent}
+Tim Cooijmans, Nicolas Ballas, C{\'e}sar Laurent, {\c{C}}a{\u{g}}lar
+  G{\"u}l{\c{c}}ehre, and Aaron Courville.
+\newblock Recurrent batch normalization.
+\newblock {\em arXiv preprint arXiv:1603.09025}, 2016.
+
+\bibitem{schuster1997bidirectional}
+Mike Schuster and Kuldip~K Paliwal.
+\newblock Bidirectional recurrent neural networks.
+\newblock {\em IEEE Transactions on Signal Processing}, 45(11):2673--2681,
+  1997.
+
+\bibitem{gers1999learning}
+Felix~A Gers, J{\"u}rgen Schmidhuber, and Fred Cummins.
+\newblock Learning to forget: Continual prediction with lstm.
+\newblock 1999.
+
+\bibitem{chung2014empirical}
+Junyoung Chung, Caglar Gulcehre, KyungHyun Cho, and Yoshua Bengio.
+\newblock Empirical evaluation of gated recurrent neural networks on sequence
+  modeling.
+\newblock {\em arXiv preprint arXiv:1412.3555}, 2014.
+
+\bibitem{agostinelli2003s}
+S~Agostinelli.
+\newblock S. agostinelli et al.(geant4 collaboration), nucl. instrum. methods
+  phys. res., sect. a 506, 250 (2003).
+\newblock {\em Nucl. Instrum. Methods Phys. Res., Sect. A}, 506:250, 2003.
+
+\bibitem{pedregosa2011scikit}
+Fabian Pedregosa, Ga{\"e}l Varoquaux, Alexandre Gramfort, Vincent Michel,
+  Bertrand Thirion, Olivier Grisel, Mathieu Blondel, Peter Prettenhofer, Ron
+  Weiss, Vincent Dubourg, et~al.
+\newblock Scikit-learn: Machine learning in python.
+\newblock {\em Journal of machine learning research}, 12(Oct):2825--2830, 2011.
+
+\bibitem{ML:ROC_AUC:Bradley:1997:UAU:1746432.1746434}
+Andrew~P. Bradley.
+\newblock The use of the area under the roc curve in the evaluation of machine
+  learning algorithms.
+\newblock {\em Pattern Recogn.}, 30(7):1145--1159, July 1997.
+
+\bibitem{gent1992special}
+CR~Gent and CP~Sheppard.
+\newblock Special feature. predicting time series by a fully connected neural
+  network trained by back propagation.
+\newblock {\em Computing \& Control Engineering Journal}, 3(3):109--112, 1992.
+
+\bibitem{graves2013speech}
+Alex Graves, Abdel-rahman Mohamed, and Geoffrey Hinton.
+\newblock Speech recognition with deep recurrent neural networks.
+\newblock In {\em Acoustics, speech and signal processing (icassp), 2013 ieee
+  international conference on}, pages 6645--6649. IEEE, 2013.
+
 \end{thebibliography}
diff --git a/Report/00_main.blg b/Report/00_main.blg
index 18a3722..986f252 100644
--- a/Report/00_main.blg
+++ b/Report/00_main.blg
@@ -3,5 +3,15 @@
 A level-1 auxiliary file: 02_mu_to_3e_decay.aux
 A level-1 auxiliary file: 03_experimental_setup.aux
 A level-1 auxiliary file: 04_machine_learning.aux
+A level-1 auxiliary file: 05_Data.aux
+A level-1 auxiliary file: 06_RNN_used.aux
+A level-1 auxiliary file: 07_Analysis.aux
+A level-1 auxiliary file: 08_Appendix.aux
 The style file: unsrt.bst
 Database file #1: bib/General.bib
+Repeated entry---line 221 of file bib/General.bib
+ : @article{ML:ROC_AUC:Bradley:1997:UAU:1746432.1746434
+ :                                                     ,
+I'm skipping whatever remains of this entry
+Warning--empty journal in gers1999learning
+(There was 1 error message)
diff --git a/Report/00_main.dvi b/Report/00_main.dvi
new file mode 100644
index 0000000..3b34c80
--- /dev/null
+++ b/Report/00_main.dvi
Binary files differ
diff --git a/Report/00_main.log b/Report/00_main.log
index 158f946..0c34dd5 100644
--- a/Report/00_main.log
+++ b/Report/00_main.log
@@ -1,4 +1,4 @@
-This is pdfTeX, Version 3.14159265-2.6-1.40.19 (MiKTeX 2.9.6730 64-bit) (preloaded format=pdflatex 2018.7.26)  29 JUL 2018 21:59
+This is pdfTeX, Version 3.14159265-2.6-1.40.19 (MiKTeX 2.9.6730 64-bit) (preloaded format=pdflatex 2018.7.26)  1 AUG 2018 16:06
 entering extended mode
 **./00_main.tex
 (00_main.tex
@@ -1548,8 +1548,12 @@
 Package footmisc Info: Declaring symbol style lamport*-robust on input line 924
 .
 ) (00_main.aux
-(01_Standard_Model.aux) (02_mu_to_3e_decay.aux) (03_experimental_setup.aux)
-(04_machine_learning.aux))
+(01_Standard_Model.aux) (02_mu_to_3e_decay.aux) (03_experimental_setup.aux
+
+LaTeX Warning: Label `recurler' multiply defined.
+
+) (04_machine_learning.aux) (05_Data.aux) (06_RNN_used.aux) (07_Analysis.aux)
+(08_Appendix.aux))
 \openout1 = `00_main.aux'.
 
 LaTeX Font Info:    Checking defaults for OML/cmm/m/it on input line 36.
@@ -1566,8 +1570,7 @@
 LaTeX Font Info:    ... okay on input line 36.
 LaTeX Font Info:    Checking defaults for PD1/pdf/m/n on input line 36.
 LaTeX Font Info:    ... okay on input line 36.
-
-("C:\Program Files\MiKTeX 2.9\tex\context\base\supp-pdf.mkii"
+ ("C:\Program Files\MiKTeX 2.9\tex\context\base\supp-pdf.mkii"
 [Loading MPS to PDF converter (version 2006.09.02).]
 \scratchcounter=\count284
 \scratchdimen=\dimen163
@@ -1637,135 +1640,120 @@
 ) [1
 
 {C:/Users/sa_li/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map}] (00_main.to
-c)
+c [2])
 \tf@toc=\write4
 \openout4 = `00_main.toc'.
 
- [2]
+ [3]
 \openout2 = `01_Standard_Model.aux'.
 
 
-(01_Standard_Model.tex
-Underfull \hbox (badness 10000) in paragraph at lines 5--8
-
- []
-
-[3
+(01_Standard_Model.tex [4
 
 ]
-Underfull \hbox (badness 10000) in paragraph at lines 49--51
+Underfull \hbox (badness 10000) in paragraph at lines 46--47
 
  []
 
 
-Underfull \hbox (badness 10000) in paragraph at lines 49--51
+Underfull \hbox (badness 10000) in paragraph at lines 64--74
 
  []
 
 
-Underfull \hbox (badness 10000) in paragraph at lines 68--78
+Underfull \hbox (badness 10000) in paragraph at lines 64--74
+
+ []
+
+[5] [6]
+Underfull \hbox (badness 10000) in paragraph at lines 78--81
 
  []
 
 
-Underfull \hbox (badness 10000) in paragraph at lines 68--78
-
- []
-
-[4] [5]
-Underfull \hbox (badness 10000) in paragraph at lines 82--85
+Underfull \hbox (badness 10000) in paragraph at lines 78--81
 
  []
 
 
-Underfull \hbox (badness 10000) in paragraph at lines 82--85
+Underfull \hbox (badness 10000) in paragraph at lines 78--81
 
  []
 
-
-Underfull \hbox (badness 10000) in paragraph at lines 82--85
-
- []
-
-<img/beta_decay_feynman.png, id=184, 200.75pt x 200.75pt>
+<img/beta_decay_feynman.png, id=338, 200.75pt x 200.75pt>
 File: img/beta_decay_feynman.png Graphic file (type png)
 <use img/beta_decay_feynman.png>
-Package pdftex.def Info: img/beta_decay_feynman.png  used on input line 89.
+Package pdftex.def Info: img/beta_decay_feynman.png  used on input line 85.
 (pdftex.def)             Requested size: 140.39958pt x 140.40762pt.
-<img/muon-decay-feynman.png, id=186, 481.8pt x 408.5664pt>
+<img/muon-decay-feynman.png, id=340, 481.8pt x 408.5664pt>
 File: img/muon-decay-feynman.png Graphic file (type png)
 <use img/muon-decay-feynman.png>
-Package pdftex.def Info: img/muon-decay-feynman.png  used on input line 94.
+Package pdftex.def Info: img/muon-decay-feynman.png  used on input line 90.
 (pdftex.def)             Requested size: 140.39958pt x 119.05476pt.
 
-Underfull \hbox (badness 10000) in paragraph at lines 102--105
+Underfull \hbox (badness 10000) in paragraph at lines 98--101
 
  []
 
 
-Underfull \hbox (badness 10000) in paragraph at lines 102--105
+Underfull \hbox (badness 10000) in paragraph at lines 98--101
 
  []
 
-[6 <./img/beta_decay_feynman.png> <./img/muon-decay-feynman.png>]
-Underfull \hbox (badness 10000) in paragraph at lines 125--126
+[7 <./img/beta_decay_feynman.png> <./img/muon-decay-feynman.png>]
+Underfull \hbox (badness 10000) in paragraph at lines 121--122
 
  []
 
 
-Underfull \hbox (badness 10000) in paragraph at lines 125--126
+Underfull \hbox (badness 10000) in paragraph at lines 121--122
 
  []
 
 
-Underfull \hbox (badness 10000) in paragraph at lines 127--128
+Underfull \hbox (badness 10000) in paragraph at lines 123--124
 
  []
 
 
-Underfull \hbox (badness 10000) in paragraph at lines 127--128
+Underfull \hbox (badness 10000) in paragraph at lines 123--124
 
  []
 
-[7]
-<img/neutrino_oscillation.png, id=213, 3069.4675pt x 1293.83376pt>
+[8]
+<img/neutrino_oscillation.png, id=367, 3069.4675pt x 1293.83376pt>
 File: img/neutrino_oscillation.png Graphic file (type png)
 <use img/neutrino_oscillation.png>
-Package pdftex.def Info: img/neutrino_oscillation.png  used on input line 148.
+Package pdftex.def Info: img/neutrino_oscillation.png  used on input line 144.
 (pdftex.def)             Requested size: 312.00119pt x 131.50337pt.
 
-Underfull \hbox (badness 10000) in paragraph at lines 160--162
+Underfull \hbox (badness 10000) in paragraph at lines 156--158
 
  []
 
 
-Underfull \hbox (badness 10000) in paragraph at lines 160--162
+Underfull \hbox (badness 10000) in paragraph at lines 159--162
 
  []
 
-
-Underfull \hbox (badness 10000) in paragraph at lines 163--167
-
- []
-
-[8 <./img/neutrino_oscillation.png>]
-<img/LFV-neutrino_osc.png, id=224, 390.86024pt x 196.93575pt>
+[9 <./img/neutrino_oscillation.png>]
+<img/LFV-neutrino_osc.png, id=378, 390.86024pt x 196.93575pt>
 File: img/LFV-neutrino_osc.png Graphic file (type png)
 <use img/LFV-neutrino_osc.png>
-Package pdftex.def Info: img/LFV-neutrino_osc.png  used on input line 171.
+Package pdftex.def Info: img/LFV-neutrino_osc.png  used on input line 168.
 (pdftex.def)             Requested size: 140.39958pt x 70.74054pt.
-<img/LFV-SUSY.png, id=225, 393.26926pt x 200.54926pt>
+<img/LFV-SUSY.png, id=379, 393.26926pt x 200.54926pt>
 File: img/LFV-SUSY.png Graphic file (type png)
 <use img/LFV-SUSY.png>
-Package pdftex.def Info: img/LFV-SUSY.png  used on input line 176.
+Package pdftex.def Info: img/LFV-SUSY.png  used on input line 173.
 (pdftex.def)             Requested size: 140.39958pt x 71.59482pt.
-<img/LFV-tree_lvl.png, id=226, 367.97475pt x 248.127pt>
+<img/LFV-tree_lvl.png, id=380, 367.97475pt x 248.127pt>
 File: img/LFV-tree_lvl.png Graphic file (type png)
 <use img/LFV-tree_lvl.png>
-Package pdftex.def Info: img/LFV-tree_lvl.png  used on input line 181.
+Package pdftex.def Info: img/LFV-tree_lvl.png  used on input line 178.
 (pdftex.def)             Requested size: 140.39958pt x 94.67162pt.
- [9 <./img/LFV-neutrino_osc.png> <./img/LFV-SUSY.png> <./img/LFV-tree_lvl.png>]
-) [10]
+) [10 <./img/LFV-neutrino_osc.png> <./img/LFV-SUSY.png> <./img/LFV-tree_lvl.png
+>] [11]
 \openout2 = `02_mu_to_3e_decay.aux'.
 
  (02_mu_to_3e_decay.tex
@@ -1806,55 +1794,46 @@
 
  []
 
-
-Underfull \hbox (badness 10000) in paragraph at lines 24--25
-
- []
-
-[11
+[12
 
 
-] [12]) [13]
+] [13]) [14]
 \openout2 = `03_experimental_setup.aux'.
 
  (03_experimental_setup.tex
-Underfull \hbox (badness 10000) in paragraph at lines 18--19
-
- []
-
-<img/setup-Ia.png, id=262, 526.96875pt x 246.32025pt>
+<img/setup-Ia.png, id=416, 526.96875pt x 246.32025pt>
 File: img/setup-Ia.png Graphic file (type png)
 <use img/setup-Ia.png>
 Package pdftex.def Info: img/setup-Ia.png  used on input line 23.
 (pdftex.def)             Requested size: 312.00119pt x 145.84636pt.
-<img/tracks-phase_I.png, id=263, 473.3685pt x 484.81125pt>
+<img/tracks-phase_I.png, id=417, 473.3685pt x 484.81125pt>
 File: img/tracks-phase_I.png Graphic file (type png)
 <use img/tracks-phase_I.png>
 Package pdftex.def Info: img/tracks-phase_I.png  used on input line 28.
 (pdftex.def)             Requested size: 140.39958pt x 143.79482pt.
-<img/tracks-phase_II.png, id=264, 471.56175pt x 487.8225pt>
+<img/tracks-phase_II.png, id=418, 471.56175pt x 487.8225pt>
 File: img/tracks-phase_II.png Graphic file (type png)
 <use img/tracks-phase_II.png>
 Package pdftex.def Info: img/tracks-phase_II.png  used on input line 33.
 (pdftex.def)             Requested size: 140.39958pt x 145.23878pt.
-<img/setup-Ib.png, id=265, 530.58224pt x 168.63pt>
+<img/setup-Ib.png, id=419, 530.58224pt x 168.63pt>
 File: img/setup-Ib.png Graphic file (type png)
 <use img/setup-Ib.png>
 Package pdftex.def Info: img/setup-Ib.png  used on input line 38.
 (pdftex.def)             Requested size: 390.0pt x 123.95313pt.
-<img/setup-II.png, id=266, 875.6715pt x 167.4255pt>
+<img/setup-II.png, id=420, 875.6715pt x 167.4255pt>
 File: img/setup-II.png Graphic file (type png)
 <use img/setup-II.png>
 Package pdftex.def Info: img/setup-II.png  used on input line 43.
 (pdftex.def)             Requested size: 390.0pt x 74.5667pt.
-[14
+ [15
 
 
 ]
 Overfull \vbox (30.10492pt too high) has occurred while \output is active []
 
 
-[15 <./img/setup-Ia.png> <./img/tracks-phase_I.png> <./img/tracks-phase_II.png>
+[16 <./img/setup-Ia.png> <./img/tracks-phase_I.png> <./img/tracks-phase_II.png>
  <./img/setup-Ib.png> <./img/setup-II.png>]
 Underfull \hbox (badness 10000) in paragraph at lines 51--52
 
@@ -1865,108 +1844,280 @@
 
  []
 
-
-Underfull \hbox (badness 10000) in paragraph at lines 61--62
-
- []
-
-[16]
-Underfull \hbox (badness 10000) in paragraph at lines 63--67
-
- []
-
-<img/tracks_in_det_xy.png, id=293, 774.895pt x 902.37125pt>
+[17]
+<img/tracks_in_det_xy.png, id=447, 1146.53343pt x 1327.20844pt>
 File: img/tracks_in_det_xy.png Graphic file (type png)
 <use img/tracks_in_det_xy.png>
 Package pdftex.def Info: img/tracks_in_det_xy.png  used on input line 70.
-(pdftex.def)             Requested size: 390.0pt x 454.15863pt.
-) [17] [18 <./img/tracks_in_det_xy.png>]
+(pdftex.def)             Requested size: 312.00119pt x 361.16603pt.
+ [18]
+<img/tracks_in_det_z.png, id=453, 1042.64531pt x 1214.28656pt>
+File: img/tracks_in_det_z.png Graphic file (type png)
+<use img/tracks_in_det_z.png>
+Package pdftex.def Info: img/tracks_in_det_z.png  used on input line 78.
+(pdftex.def)             Requested size: 312.00119pt x 363.36235pt.
+ [19 <./img/tracks_in_det_xy.png>]) [20 <./img/tracks_in_det_z.png>]
 \openout2 = `04_machine_learning.aux'.
 
- (04_machine_learning.tex
-Underfull \hbox (badness 10000) in paragraph at lines 5--6
+
+(04_machine_learning.tex
+Underfull \hbox (badness 10000) in paragraph at lines 6--7
 
  []
 
 
-Underfull \hbox (badness 10000) in paragraph at lines 11--12
+Underfull \hbox (badness 10000) in paragraph at lines 14--15
 
  []
 
-[19
+[21
 
 
 ]
-<img/neural_network.png, id=324, 599.23875pt x 325.215pt>
+<img/neural_network.png, id=485, 599.23875pt x 325.215pt>
 File: img/neural_network.png Graphic file (type png)
 <use img/neural_network.png>
-Package pdftex.def Info: img/neural_network.png  used on input line 26.
+Package pdftex.def Info: img/neural_network.png  used on input line 29.
 (pdftex.def)             Requested size: 312.00119pt x 169.33112pt.
-<img/neuron.png, id=325, 308.65312pt x 196.48407pt>
+<img/neuron.png, id=486, 308.65312pt x 196.48407pt>
 File: img/neuron.png Graphic file (type png)
 <use img/neuron.png>
-Package pdftex.def Info: img/neuron.png  used on input line 31.
+Package pdftex.def Info: img/neuron.png  used on input line 34.
 (pdftex.def)             Requested size: 156.0006pt x 99.30911pt.
 
-Underfull \hbox (badness 10000) in paragraph at lines 39--40
+Underfull \hbox (badness 10000) in paragraph at lines 42--43
 
  []
 
-[20 <./img/neural_network.png> <./img/neuron.png>]
-<img/selu.png, id=334, 389.455pt x 266.9975pt>
+[22 <./img/neural_network.png> <./img/neuron.png>]
+<img/selu.png, id=495, 389.455pt x 266.9975pt>
 File: img/selu.png Graphic file (type png)
 <use img/selu.png>
-Package pdftex.def Info: img/selu.png  used on input line 54.
+Package pdftex.def Info: img/selu.png  used on input line 59.
 (pdftex.def)             Requested size: 175.49881pt x 120.31479pt.
-<img/relu.png, id=335, 390.45876pt x 266.9975pt>
+<img/relu.png, id=496, 390.45876pt x 266.9975pt>
 File: img/relu.png Graphic file (type png)
 <use img/relu.png>
-Package pdftex.def Info: img/relu.png  used on input line 59.
+Package pdftex.def Info: img/relu.png  used on input line 64.
 (pdftex.def)             Requested size: 175.49881pt x 120.00516pt.
-<img/tanh.png, id=336, 405.515pt x 266.9975pt>
+<img/tanh.png, id=497, 405.515pt x 266.9975pt>
 File: img/tanh.png Graphic file (type png)
 <use img/tanh.png>
-Package pdftex.def Info: img/tanh.png  used on input line 64.
+Package pdftex.def Info: img/tanh.png  used on input line 69.
 (pdftex.def)             Requested size: 175.49881pt x 115.55222pt.
-
-Underfull \hbox (badness 10000) in paragraph at lines 72--73
-
- []
-
-[21 <./img/selu.png> <./img/relu.png> <./img/tanh.png>]
-LaTeX Font Info:    Try loading font information for OMS+cmr on input line 89.
+ [23 <./img/selu.png> <./img/relu.png> <./img/tanh.png>] [24]
+LaTeX Font Info:    Try loading font information for OMS+cmr on input line 99.
 
 ("C:\Program Files\MiKTeX 2.9\tex\latex\base\omscmr.fd"
 File: omscmr.fd 2014/09/29 v2.5h Standard LaTeX font definitions
 )
 LaTeX Font Info:    Font shape `OMS/cmr/m/n' in size <12> not available
-(Font)              Font shape `OMS/cmsy/m/n' tried instead on input line 89.
- [22]) [23]
-(00_main.bbl [24
+(Font)              Font shape `OMS/cmsy/m/n' tried instead on input line 99.
+ [25] [26]
+<img/batch_norm.jpeg, id=537, 1034.86626pt x 356.33125pt>
+File: img/batch_norm.jpeg Graphic file (type jpg)
+<use img/batch_norm.jpeg>
+Package pdftex.def Info: img/batch_norm.jpeg  used on input line 164.
+(pdftex.def)             Requested size: 390.0pt x 134.28722pt.
+ [27 <./img/batch_norm.jpeg>]
+<img/RNN_general_architecture.png, id=547, 509.5035pt x 190.91325pt>
+File: img/RNN_general_architecture.png Graphic file (type png)
+<use img/RNN_general_architecture.png>
+Package pdftex.def Info: img/RNN_general_architecture.png  used on input line 1
+80.
+(pdftex.def)             Requested size: 390.0pt x 146.13263pt.
 
-])
-Package atveryend Info: Empty hook `BeforeClearDocument' on input line 70.
- [25]
-Package atveryend Info: Empty hook `AfterLastShipout' on input line 70.
- (00_main.aux (01_Standard_Model.aux)
-(02_mu_to_3e_decay.aux) (03_experimental_setup.aux) (04_machine_learning.aux))
-Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 70.
-Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 70.
+Underfull \hbox (badness 10000) in paragraph at lines 199--200
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 201--204
+
+ []
+
+[28 <./img/RNN_general_architecture.png>]
+Underfull \hbox (badness 10000) in paragraph at lines 209--210
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 211--212
+
+ []
+
+<img/LSTM_cell.png, id=558, 353.52075pt x 210.7875pt>
+File: img/LSTM_cell.png Graphic file (type png)
+<use img/LSTM_cell.png>
+Package pdftex.def Info: img/LSTM_cell.png  used on input line 217.
+(pdftex.def)             Requested size: 312.00119pt x 186.04034pt.
+[29]
+Underfull \hbox (badness 10000) in paragraph at lines 237--238
+
+ []
+
+) [30 <./img/LSTM_cell.png>]
+\openout2 = `05_Data.aux'.
+
+ (05_Data.tex
+Underfull \hbox (badness 10000) in paragraph at lines 4--6
+
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 7--8
+
+ []
+
+[31
+
+
+]) [32]
+\openout2 = `06_RNN_used.aux'.
+
+ (06_RNN_used.tex
+<img/RNN-Pred-Arch.png, id=595, 1182.66844pt x 954.56625pt>
+File: img/RNN-Pred-Arch.png Graphic file (type png)
+<use img/RNN-Pred-Arch.png>
+Package pdftex.def Info: img/RNN-Pred-Arch.png  used on input line 9.
+(pdftex.def)             Requested size: 390.0pt x 314.7748pt.
+ [33
+
+
+ <./img/RNN-Pred-Arch.png>]
+Overfull \hbox (12.94238pt too wide) in paragraph at lines 25--28
+\OT1/cmr/m/n/12 The out-put was a 12 di-men-sional vec-tor of the shape: $(\OML
+/cmm/m/it/12 x[]; y[]; z[]; x[]; y[]; z[]; :::; z[]\OT1/cmr/m/n/12 )$.
+ []
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 31--32
+
+ []
+
+<img/RNN-Classifier-Arch.png, id=606, 723.45282pt x 1344.52313pt>
+File: img/RNN-Classifier-Arch.png Graphic file (type png)
+<use img/RNN-Classifier-Arch.png>
+Package pdftex.def Info: img/RNN-Classifier-Arch.png  used on input line 45.
+(pdftex.def)             Requested size: 292.5pt x 543.60568pt.
+[34] [35 <./img/RNN-Classifier-Arch.png>]) [36]
+\openout2 = `07_Analysis.aux'.
+
+ (07_Analysis.tex
+
+Package hyperref Warning: Token not allowed in a PDF string (PDFDocEncoding):
+(hyperref)                removing `math shift' on input line 3.
+
+
+Package hyperref Warning: Token not allowed in a PDF string (PDFDocEncoding):
+(hyperref)                removing `\chi' on input line 3.
+
+
+Package hyperref Warning: Token not allowed in a PDF string (PDFDocEncoding):
+(hyperref)                removing `superscript' on input line 3.
+
+
+Package hyperref Warning: Token not allowed in a PDF string (PDFDocEncoding):
+(hyperref)                removing `math shift' on input line 3.
+
+
+Underfull \hbox (badness 10000) in paragraph at lines 5--6
+
+ []
+
+<img/XGB_tf-ft_hist.png, id=626, 368.37625pt x 266.9975pt>
+File: img/XGB_tf-ft_hist.png Graphic file (type png)
+<use img/XGB_tf-ft_hist.png>
+Package pdftex.def Info: img/XGB_tf-ft_hist.png  used on input line 20.
+(pdftex.def)             Requested size: 312.00119pt x 226.13411pt.
+<img/XGB_ROC-curve.png, id=627, 390.45876pt x 266.9975pt>
+File: img/XGB_ROC-curve.png Graphic file (type png)
+<use img/XGB_ROC-curve.png>
+Package pdftex.def Info: img/XGB_ROC-curve.png  used on input line 25.
+(pdftex.def)             Requested size: 312.00119pt x 213.3579pt.
+[37
+
+
+]
+Underfull \hbox (badness 10000) in paragraph at lines 33--34
+
+ []
+
+[38 <./img/XGB_tf-ft_hist.png> <./img/XGB_ROC-curve.png>]
+Underfull \hbox (badness 10000) in paragraph at lines 35--36
+
+ []
+
+) [39]
+\openout2 = `08_Appendix.aux'.
+
+ (08_Appendix.tex) [40
+
+
+] (00_main.bbl [41
+
+] [42])
+Package atveryend Info: Empty hook `BeforeClearDocument' on input line 72.
+ [43]
+Package atveryend Info: Empty hook `AfterLastShipout' on input line 72.
+ (00_main.aux
+(01_Standard_Model.aux) (02_mu_to_3e_decay.aux) (03_experimental_setup.aux)
+(04_machine_learning.aux) (05_Data.aux) (06_RNN_used.aux) (07_Analysis.aux)
+(08_Appendix.aux))
+Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 72.
+Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 72.
 Package rerunfilecheck Info: File `00_main.out' has not changed.
-(rerunfilecheck)             Checksum: 507A136055700225F066D39494392802;1605.
-Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 70.
+(rerunfilecheck)             Checksum: 075DAF2E576AE5EEB0BE329E17F60ABB;3780.
+
+
+LaTeX Warning: There were multiply-defined labels.
+
+Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 72.
  ) 
 Here is how much of TeX's memory you used:
- 19806 strings out of 492973
- 334275 string characters out of 3135932
- 430290 words of memory out of 3000000
- 23338 multiletter control sequences out of 15000+200000
+ 20013 strings out of 492973
+ 338198 string characters out of 3135932
+ 433719 words of memory out of 3000000
+ 23429 multiletter control sequences out of 15000+200000
  548944 words of font info for 87 fonts, out of 3000000 for 9000
  1141 hyphenation exceptions out of 8191
- 47i,19n,65p,1103b,571s stack positions out of 5000i,500n,10000p,200000b,50000s
-pdfTeX warning (dest): name{Hfootnote.17} has been referenced but does not ex
+ 47i,19n,65p,1101b,571s stack positions out of 5000i,500n,10000p,200000b,50000s
+pdfTeX warning (dest): name{Hfootnote.28} has been referenced but does not ex
 ist, replaced by a fixed one
 
+pdfTeX warning (dest): name{Hfootnote.27} has been referenced but does not exis
+t, replaced by a fixed one
+
+pdfTeX warning (dest): name{Hfootnote.26} has been referenced but does not exis
+t, replaced by a fixed one
+
+pdfTeX warning (dest): name{Hfootnote.25} has been referenced but does not exis
+t, replaced by a fixed one
+
+pdfTeX warning (dest): name{Hfootnote.24} has been referenced but does not exis
+t, replaced by a fixed one
+
+pdfTeX warning (dest): name{Hfootnote.23} has been referenced but does not exis
+t, replaced by a fixed one
+
+pdfTeX warning (dest): name{Hfootnote.22} has been referenced but does not exis
+t, replaced by a fixed one
+
+pdfTeX warning (dest): name{Hfootnote.21} has been referenced but does not exis
+t, replaced by a fixed one
+
+pdfTeX warning (dest): name{Hfootnote.20} has been referenced but does not exis
+t, replaced by a fixed one
+
+pdfTeX warning (dest): name{Hfootnote.19} has been referenced but does not exis
+t, replaced by a fixed one
+
+pdfTeX warning (dest): name{Hfootnote.18} has been referenced but does not exis
+t, replaced by a fixed one
+
+pdfTeX warning (dest): name{Hfootnote.17} has been referenced but does not exis
+t, replaced by a fixed one
+
 pdfTeX warning (dest): name{Hfootnote.16} has been referenced but does not exis
 t, replaced by a fixed one
 
@@ -2020,21 +2171,21 @@
 es/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmex10.pfb><C:/Program Files/MiKTe
 X 2.9/fonts/type1/public/amsfonts/cm/cmmi10.pfb><C:/Program Files/MiKTeX 2.9/fo
 nts/type1/public/amsfonts/cm/cmmi12.pfb><C:/Program Files/MiKTeX 2.9/fonts/type
-1/public/amsfonts/cm/cmmi7.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/
-amsfonts/cm/cmmi8.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/
-cm/cmr10.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr12.
-pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr17.pfb><C:/P
-rogram Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr6.pfb><C:/Program Fil
-es/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr7.pfb><C:/Program Files/MiKTeX 
-2.9/fonts/type1/public/amsfonts/cm/cmr8.pfb><C:/Program Files/MiKTeX 2.9/fonts/
-type1/public/amsfonts/cm/cmsy10.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/pu
-blic/amsfonts/cm/cmsy6.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsf
-onts/cm/cmsy7.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/c
-msy8.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmti12.pfb
->
-Output written on 00_main.pdf (25 pages, 756234 bytes).
+1/public/amsfonts/cm/cmmi6.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/
+amsfonts/cm/cmmi7.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/
+cm/cmmi8.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr10.
+pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr12.pfb><C:/P
+rogram Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr17.pfb><C:/Program Fi
+les/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr6.pfb><C:/Program Files/MiKTeX
+ 2.9/fonts/type1/public/amsfonts/cm/cmr7.pfb><C:/Program Files/MiKTeX 2.9/fonts
+/type1/public/amsfonts/cm/cmr8.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/pub
+lic/amsfonts/cm/cmsy10.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsf
+onts/cm/cmsy6.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/c
+msy7.pfb><C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsy8.pfb>
+<C:/Program Files/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmti12.pfb>
+Output written on 00_main.pdf (43 pages, 1683081 bytes).
 PDF statistics:
- 453 PDF objects out of 1000 (max. 8388607)
- 101 named destinations out of 1000 (max. 500000)
- 278 words of extra memory for PDF output out of 10000 (max. 10000000)
+ 776 PDF objects out of 1000 (max. 8388607)
+ 180 named destinations out of 1000 (max. 500000)
+ 558 words of extra memory for PDF output out of 10000 (max. 10000000)
 
diff --git a/Report/00_main.out b/Report/00_main.out
index 3ce0a73..51a2181 100644
--- a/Report/00_main.out
+++ b/Report/00_main.out
@@ -7,7 +7,7 @@
 \BOOKMARK [1][-]{section.3}{eee decay}{}% 7
 \BOOKMARK [2][-]{subsection.3.1}{Kinematics}{section.3}% 8
 \BOOKMARK [2][-]{subsection.3.2}{Background events}{section.3}% 9
-\BOOKMARK [3][-]{subsubsection.3.2.1}{}{subsection.3.2}% 10
+\BOOKMARK [3][-]{subsubsection.3.2.1}{Internal conversions}{subsection.3.2}% 10
 \BOOKMARK [3][-]{subsubsection.3.2.2}{Michel decay}{subsection.3.2}% 11
 \BOOKMARK [3][-]{subsubsection.3.2.3}{Radiative muon decay}{subsection.3.2}% 12
 \BOOKMARK [3][-]{subsubsection.3.2.4}{BhaBha scattering}{subsection.3.2}% 13
@@ -22,3 +22,33 @@
 \BOOKMARK [1][-]{section.5}{Machine learning}{}% 22
 \BOOKMARK [2][-]{subsection.5.1}{Introduction}{section.5}% 23
 \BOOKMARK [2][-]{subsection.5.2}{Artificial neural networks}{section.5}% 24
+\BOOKMARK [3][-]{subsubsection.5.2.1}{General concepts}{subsection.5.2}% 25
+\BOOKMARK [3][-]{subsubsection.5.2.2}{Activation functions}{subsection.5.2}% 26
+\BOOKMARK [3][-]{subsubsection.5.2.3}{Concepts of training}{subsection.5.2}% 27
+\BOOKMARK [3][-]{subsubsection.5.2.4}{Loss functions}{subsection.5.2}% 28
+\BOOKMARK [3][-]{subsubsection.5.2.5}{Stochastic gradient descent}{subsection.5.2}% 29
+\BOOKMARK [3][-]{subsubsection.5.2.6}{Stochastic gradient descent with Momentum}{subsection.5.2}% 30
+\BOOKMARK [3][-]{subsubsection.5.2.7}{RMSProp}{subsection.5.2}% 31
+\BOOKMARK [3][-]{subsubsection.5.2.8}{Adam}{subsection.5.2}% 32
+\BOOKMARK [3][-]{subsubsection.5.2.9}{Decaying learning rate}{subsection.5.2}% 33
+\BOOKMARK [3][-]{subsubsection.5.2.10}{Batch normalisation}{subsection.5.2}% 34
+\BOOKMARK [2][-]{subsection.5.3}{Recurrent Neural Networks}{section.5}% 35
+\BOOKMARK [3][-]{subsubsection.5.3.1}{General concepts}{subsection.5.3}% 36
+\BOOKMARK [3][-]{subsubsection.5.3.2}{Most common architectures}{subsection.5.3}% 37
+\BOOKMARK [3][-]{subsubsection.5.3.3}{Cell types}{subsection.5.3}% 38
+\BOOKMARK [2][-]{subsection.5.4}{XGBoost}{section.5}% 39
+\BOOKMARK [1][-]{section.6}{Data}{}% 40
+\BOOKMARK [2][-]{subsection.6.1}{General information}{section.6}% 41
+\BOOKMARK [2][-]{subsection.6.2}{Preprocessing}{section.6}% 42
+\BOOKMARK [3][-]{subsubsection.6.2.1}{Dataset 1}{subsection.6.2}% 43
+\BOOKMARK [3][-]{subsubsection.6.2.2}{Dataset 2}{subsection.6.2}% 44
+\BOOKMARK [1][-]{section.7}{RNN's used}{}% 45
+\BOOKMARK [2][-]{subsection.7.1}{RNN for track prediction}{section.7}% 46
+\BOOKMARK [2][-]{subsection.7.2}{RNN for classification of tracks}{section.7}% 47
+\BOOKMARK [1][-]{section.8}{Results}{}% 48
+\BOOKMARK [2][-]{subsection.8.1}{Best 2}{section.8}% 49
+\BOOKMARK [2][-]{subsection.8.2}{RNN classifier with RNN track prediction input}{section.8}% 50
+\BOOKMARK [2][-]{subsection.8.3}{XGBoost}{section.8}% 51
+\BOOKMARK [2][-]{subsection.8.4}{Comparison in performance of the RNN and XGBoost}{section.8}% 52
+\BOOKMARK [2][-]{subsection.8.5}{Outlook}{section.8}% 53
+\BOOKMARK [1][-]{section.9}{Acknowledgements}{}% 54
diff --git a/Report/00_main.pdf b/Report/00_main.pdf
index 07a74e2..0e4cd53 100644
--- a/Report/00_main.pdf
+++ b/Report/00_main.pdf
Binary files differ
diff --git a/Report/00_main.synctex.gz b/Report/00_main.synctex.gz
index 783fa73..45d1d42 100644
--- a/Report/00_main.synctex.gz
+++ b/Report/00_main.synctex.gz
Binary files differ
diff --git a/Report/00_main.tex b/Report/00_main.tex
index 688d84c..917f4e3 100644
--- a/Report/00_main.tex
+++ b/Report/00_main.tex
@@ -59,11 +59,13 @@
 
 \include{04_machine_learning}
 
-%\include{05_Graphics}
-%
-%\include{06_Calculus}
-%
-%\include{07_Error-Calculus}
+\include{05_Data}
+
+\include{06_RNN_used}
+
+\include{07_Analysis}
+
+\include{08_Appendix}
 
 \bibliographystyle{unsrt}
 \bibliography{bib/General}
diff --git a/Report/00_main.toc b/Report/00_main.toc
index 2e08b37..b2f7ce8 100644
--- a/Report/00_main.toc
+++ b/Report/00_main.toc
@@ -1,25 +1,55 @@
 \babel@toc {english}{}
-\contentsline {section}{\numberline {1}Standard Model}{3}{section.1}
-\contentsline {subsection}{\numberline {1.1}Elementary particles and forces}{3}{subsection.1.1}
-\contentsline {subsection}{\numberline {1.2}Interaction rules}{6}{subsection.1.2}
-\contentsline {section}{\numberline {2}Physics beyond the SM}{7}{section.2}
-\contentsline {subsection}{\numberline {2.1}Neutrino Oscillation}{7}{subsection.2.1}
-\contentsline {subsection}{\numberline {2.2}New physics}{8}{subsection.2.2}
-\contentsline {section}{\numberline {3}$\mu \rightarrow eee$ decay}{11}{section.3}
-\contentsline {subsection}{\numberline {3.1}Kinematics}{11}{subsection.3.1}
-\contentsline {subsection}{\numberline {3.2}Background events}{11}{subsection.3.2}
-\contentsline {subsubsection}{\numberline {3.2.1}}{11}{subsubsection.3.2.1}
-\contentsline {subsubsection}{\numberline {3.2.2}Michel decay}{12}{subsubsection.3.2.2}
-\contentsline {subsubsection}{\numberline {3.2.3}Radiative muon decay}{12}{subsubsection.3.2.3}
-\contentsline {subsubsection}{\numberline {3.2.4}BhaBha scattering}{12}{subsubsection.3.2.4}
-\contentsline {subsubsection}{\numberline {3.2.5}Pion decays}{12}{subsubsection.3.2.5}
-\contentsline {subsubsection}{\numberline {3.2.6}Analysis of the background}{13}{subsubsection.3.2.6}
-\contentsline {section}{\numberline {4}Mu3e experiment}{14}{section.4}
-\contentsline {subsection}{\numberline {4.1}Requirements}{14}{subsection.4.1}
-\contentsline {subsection}{\numberline {4.2}Phase I}{14}{subsection.4.2}
-\contentsline {subsection}{\numberline {4.3}Phase II}{14}{subsection.4.3}
-\contentsline {subsection}{\numberline {4.4}Experimental setup}{14}{subsection.4.4}
-\contentsline {subsection}{\numberline {4.5}The problem of low longitudinal momentum recurlers}{17}{subsection.4.5}
-\contentsline {section}{\numberline {5}Machine learning}{19}{section.5}
-\contentsline {subsection}{\numberline {5.1}Introduction}{19}{subsection.5.1}
-\contentsline {subsection}{\numberline {5.2}Artificial neural networks}{19}{subsection.5.2}
+\contentsline {section}{\numberline {1}Standard Model}{4}{section.1}
+\contentsline {subsection}{\numberline {1.1}Elementary particles and forces}{4}{subsection.1.1}
+\contentsline {subsection}{\numberline {1.2}Interaction rules}{7}{subsection.1.2}
+\contentsline {section}{\numberline {2}Physics beyond the SM}{8}{section.2}
+\contentsline {subsection}{\numberline {2.1}Neutrino Oscillation}{8}{subsection.2.1}
+\contentsline {subsection}{\numberline {2.2}New physics}{9}{subsection.2.2}
+\contentsline {section}{\numberline {3}$\mu \rightarrow eee$ decay}{12}{section.3}
+\contentsline {subsection}{\numberline {3.1}Kinematics}{12}{subsection.3.1}
+\contentsline {subsection}{\numberline {3.2}Background events}{12}{subsection.3.2}
+\contentsline {subsubsection}{\numberline {3.2.1}Internal conversions}{12}{subsubsection.3.2.1}
+\contentsline {subsubsection}{\numberline {3.2.2}Michel decay}{13}{subsubsection.3.2.2}
+\contentsline {subsubsection}{\numberline {3.2.3}Radiative muon decay}{13}{subsubsection.3.2.3}
+\contentsline {subsubsection}{\numberline {3.2.4}BhaBha scattering}{13}{subsubsection.3.2.4}
+\contentsline {subsubsection}{\numberline {3.2.5}Pion decays}{13}{subsubsection.3.2.5}
+\contentsline {subsubsection}{\numberline {3.2.6}Analysis of the background}{14}{subsubsection.3.2.6}
+\contentsline {section}{\numberline {4}Mu3e experiment}{15}{section.4}
+\contentsline {subsection}{\numberline {4.1}Requirements}{15}{subsection.4.1}
+\contentsline {subsection}{\numberline {4.2}Phase I}{15}{subsection.4.2}
+\contentsline {subsection}{\numberline {4.3}Phase II}{15}{subsection.4.3}
+\contentsline {subsection}{\numberline {4.4}Experimental setup}{15}{subsection.4.4}
+\contentsline {subsection}{\numberline {4.5}The problem of low longitudinal momentum recurlers}{18}{subsection.4.5}
+\contentsline {section}{\numberline {5}Machine learning}{21}{section.5}
+\contentsline {subsection}{\numberline {5.1}Introduction}{21}{subsection.5.1}
+\contentsline {subsection}{\numberline {5.2}Artificial neural networks}{21}{subsection.5.2}
+\contentsline {subsubsection}{\numberline {5.2.1}General concepts}{21}{subsubsection.5.2.1}
+\contentsline {subsubsection}{\numberline {5.2.2}Activation functions}{23}{subsubsection.5.2.2}
+\contentsline {subsubsection}{\numberline {5.2.3}Concepts of training}{24}{subsubsection.5.2.3}
+\contentsline {subsubsection}{\numberline {5.2.4}Loss functions}{24}{subsubsection.5.2.4}
+\contentsline {subsubsection}{\numberline {5.2.5}Stochastic gradient descent}{25}{subsubsection.5.2.5}
+\contentsline {subsubsection}{\numberline {5.2.6}Stochastic gradient descent with Momentum}{25}{subsubsection.5.2.6}
+\contentsline {subsubsection}{\numberline {5.2.7}RMSProp}{25}{subsubsection.5.2.7}
+\contentsline {subsubsection}{\numberline {5.2.8}Adam}{26}{subsubsection.5.2.8}
+\contentsline {subsubsection}{\numberline {5.2.9}Decaying learning rate}{27}{subsubsection.5.2.9}
+\contentsline {subsubsection}{\numberline {5.2.10}Batch normalisation}{27}{subsubsection.5.2.10}
+\contentsline {subsection}{\numberline {5.3}Recurrent Neural Networks}{27}{subsection.5.3}
+\contentsline {subsubsection}{\numberline {5.3.1}General concepts}{27}{subsubsection.5.3.1}
+\contentsline {subsubsection}{\numberline {5.3.2}Most common architectures}{28}{subsubsection.5.3.2}
+\contentsline {subsubsection}{\numberline {5.3.3}Cell types}{29}{subsubsection.5.3.3}
+\contentsline {subsection}{\numberline {5.4}XGBoost}{30}{subsection.5.4}
+\contentsline {section}{\numberline {6}Data}{31}{section.6}
+\contentsline {subsection}{\numberline {6.1}General information}{31}{subsection.6.1}
+\contentsline {subsection}{\numberline {6.2}Preprocessing}{31}{subsection.6.2}
+\contentsline {subsubsection}{\numberline {6.2.1}Dataset 1}{31}{subsubsection.6.2.1}
+\contentsline {subsubsection}{\numberline {6.2.2}Dataset 2}{32}{subsubsection.6.2.2}
+\contentsline {section}{\numberline {7}RNN's used}{33}{section.7}
+\contentsline {subsection}{\numberline {7.1}RNN for track prediction}{33}{subsection.7.1}
+\contentsline {subsection}{\numberline {7.2}RNN for classification of tracks}{34}{subsection.7.2}
+\contentsline {section}{\numberline {8}Results}{37}{section.8}
+\contentsline {subsection}{\numberline {8.1}Best $\chi ^2$}{37}{subsection.8.1}
+\contentsline {subsection}{\numberline {8.2}RNN classifier with RNN track prediction input}{37}{subsection.8.2}
+\contentsline {subsection}{\numberline {8.3}XGBoost}{37}{subsection.8.3}
+\contentsline {subsection}{\numberline {8.4}Comparison in performance of the RNN and XGBoost}{39}{subsection.8.4}
+\contentsline {subsection}{\numberline {8.5}Outlook}{39}{subsection.8.5}
+\contentsline {section}{\numberline {9}Acknowledgements}{40}{section.9}
diff --git a/Report/01_Standard_Model.aux b/Report/01_Standard_Model.aux
index 057e74c..edfff30 100644
--- a/Report/01_Standard_Model.aux
+++ b/Report/01_Standard_Model.aux
@@ -1,43 +1,43 @@
 \relax 
 \providecommand\hyper@newdestlabel[2]{}
-\@writefile{toc}{\contentsline {section}{\numberline {1}Standard Model}{3}{section.1}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Elementary particles and forces}{3}{subsection.1.1}}
-\newlabel{intro_elem_part}{{1.1}{3}{Elementary particles and forces}{subsection.1.1}{}}
-\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Quarks in the Standard Model\relax }}{3}{table.caption.2}}
+\@writefile{toc}{\contentsline {section}{\numberline {1}Standard Model}{4}{section.1}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Elementary particles and forces}{4}{subsection.1.1}}
+\newlabel{intro_elem_part}{{1.1}{4}{Elementary particles and forces}{subsection.1.1}{}}
+\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Quarks in the Standard Model\relax }}{4}{table.caption.2}}
 \providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
-\newlabel{Quark_SM_table}{{1}{3}{Quarks in the Standard Model\relax }{table.caption.2}{}}
+\newlabel{Quark_SM_table}{{1}{4}{Quarks in the Standard Model\relax }{table.caption.2}{}}
 \citation{thomson2013modern}
 \citation{thomson2013modern}
 \citation{thomson2013modern}
-\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Leptons in the standard model\relax }}{4}{table.caption.3}}
-\newlabel{Lepton_SM_table}{{2}{4}{Leptons in the standard model\relax }{table.caption.3}{}}
-\newlabel{Lepton_table}{{2}{4}{Leptons in the standard model\relax }{table.caption.3}{}}
-\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Fundamental forces\relax }}{4}{table.caption.4}}
-\newlabel{fund_forces_table}{{3}{4}{Fundamental forces\relax }{table.caption.4}{}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Interaction rules}{6}{subsection.1.2}}
-\newlabel{beta-decay_feynman}{{1a}{6}{Feynman diagram of the $\beta $-decay\relax }{figure.caption.5}{}}
-\newlabel{sub@beta-decay_feynman}{{a}{6}{Feynman diagram of the $\beta $-decay\relax }{figure.caption.5}{}}
-\newlabel{muon-decay_feynman}{{1b}{6}{Feynman diagram of a $\mu $-decay\relax }{figure.caption.5}{}}
-\newlabel{sub@muon-decay_feynman}{{b}{6}{Feynman diagram of a $\mu $-decay\relax }{figure.caption.5}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Certain diagrams of decays\relax }}{6}{figure.caption.5}}
+\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Leptons in the standard model\relax }}{5}{table.caption.3}}
+\newlabel{Lepton_SM_table}{{2}{5}{Leptons in the standard model\relax }{table.caption.3}{}}
+\newlabel{Lepton_table}{{2}{5}{Leptons in the standard model\relax }{table.caption.3}{}}
+\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Fundamental forces\relax }}{5}{table.caption.4}}
+\newlabel{fund_forces_table}{{3}{5}{Fundamental forces\relax }{table.caption.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Interaction rules}{7}{subsection.1.2}}
+\newlabel{beta-decay_feynman}{{1a}{7}{Feynman diagram of the $\beta $-decay\relax }{figure.caption.5}{}}
+\newlabel{sub@beta-decay_feynman}{{a}{7}{Feynman diagram of the $\beta $-decay\relax }{figure.caption.5}{}}
+\newlabel{muon-decay_feynman}{{1b}{7}{Feynman diagram of a $\mu $-decay\relax }{figure.caption.5}{}}
+\newlabel{sub@muon-decay_feynman}{{b}{7}{Feynman diagram of a $\mu $-decay\relax }{figure.caption.5}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Certain diagrams of decays\relax }}{7}{figure.caption.5}}
 \citation{abe2008precision}
 \citation{adamson2011measurement}
-\@writefile{toc}{\contentsline {section}{\numberline {2}Physics beyond the SM}{7}{section.2}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Neutrino Oscillation}{7}{subsection.2.1}}
-\newlabel{PMNS_neutrino}{{1}{7}{Neutrino Oscillation}{equation.2.1}{}}
-\newlabel{neutrino_flavour_change_prob}{{2}{8}{Neutrino Oscillation}{equation.2.2}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Process that violates lepton family number conservation through neutrino oscillation\relax }}{8}{figure.caption.6}}
-\newlabel{neutrino_osc_feyn}{{2}{8}{Process that violates lepton family number conservation through neutrino oscillation\relax }{figure.caption.6}{}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}New physics}{8}{subsection.2.2}}
-\newlabel{LFV-neutrino_osc}{{3a}{9}{LFV through neutrino oscillation\relax }{figure.caption.7}{}}
-\newlabel{sub@LFV-neutrino_osc}{{a}{9}{LFV through neutrino oscillation\relax }{figure.caption.7}{}}
-\newlabel{LFV-SUSY}{{3b}{9}{LFV by using supersymmetric particles\relax }{figure.caption.7}{}}
-\newlabel{sub@LFV-SUSY}{{b}{9}{LFV by using supersymmetric particles\relax }{figure.caption.7}{}}
-\newlabel{LFV-tree_lvl}{{3c}{9}{LFV at tree level\relax }{figure.caption.7}{}}
-\newlabel{sub@LFV-tree_lvl}{{c}{9}{LFV at tree level\relax }{figure.caption.7}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Charged LFV\relax }}{9}{figure.caption.7}}
+\@writefile{toc}{\contentsline {section}{\numberline {2}Physics beyond the SM}{8}{section.2}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Neutrino Oscillation}{8}{subsection.2.1}}
+\newlabel{PMNS_neutrino}{{1}{8}{Neutrino Oscillation}{equation.2.1}{}}
+\newlabel{neutrino_flavour_change_prob}{{2}{9}{Neutrino Oscillation}{equation.2.2}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Process that violates lepton family number conservation through neutrino oscillation\relax }}{9}{figure.caption.6}}
+\newlabel{neutrino_osc_feyn}{{2}{9}{Process that violates lepton family number conservation through neutrino oscillation\relax }{figure.caption.6}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}New physics}{9}{subsection.2.2}}
+\newlabel{LFV-neutrino_osc}{{3a}{10}{LFV through neutrino oscillation\relax }{figure.caption.7}{}}
+\newlabel{sub@LFV-neutrino_osc}{{a}{10}{LFV through neutrino oscillation\relax }{figure.caption.7}{}}
+\newlabel{LFV-SUSY}{{3b}{10}{LFV by using supersymmetric particles\relax }{figure.caption.7}{}}
+\newlabel{sub@LFV-SUSY}{{b}{10}{LFV by using supersymmetric particles\relax }{figure.caption.7}{}}
+\newlabel{LFV-tree_lvl}{{3c}{10}{LFV at tree level\relax }{figure.caption.7}{}}
+\newlabel{sub@LFV-tree_lvl}{{c}{10}{LFV at tree level\relax }{figure.caption.7}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Charged LFV\relax }}{10}{figure.caption.7}}
 \@setckpt{01_Standard_Model}{
-\setcounter{page}{11}
+\setcounter{page}{12}
 \setcounter{equation}{2}
 \setcounter{enumi}{0}
 \setcounter{enumii}{0}
diff --git a/Report/01_Standard_Model.tex b/Report/01_Standard_Model.tex
index 4ffa87f..f764ed1 100644
--- a/Report/01_Standard_Model.tex
+++ b/Report/01_Standard_Model.tex
@@ -4,8 +4,7 @@
 
 The Standard Model(SM) describes all known elementary particles as well as three of the four known forces\footnote{Strong, weak and electromagnetic forces}.\\
 The elementary particles that make up matter can be split into two categories, namely quarks and leptons. There are 6 types of quarks and six types of leptons. The type of a particle is conventionally called flavour. The six quark flavours and the six lepton flavours are separated over 3 generations (each which two quarks and two leptons in it).
-Experimental evidence suggests that there exist exactly three generations of particles. Each particle of the first generation has higher energy versions of itself with the similar properties, besides their mass, (e.g. $e^- \rightarrow  \mu^- \rightarrow \tau^-$)as in other generations. For each following generation, the particles have a higher mass than the generation before.\\
-
+Experimental evidence suggests that there exist exactly three generations of particles. Each particle of the first generation has higher energy versions of itself with the similar properties, besides their mass, (e.g. $e^- \rightarrow  \mu^- \rightarrow \tau^-$)as in other generations. For each following generation, the particles have a higher mass than the generation before.
 
 \begin{table}[H]
 \begin{center}
@@ -23,12 +22,10 @@
 \end{center}
 \end{table}
 
-
 One category consists of quarks($q$)(see Table \ref{Quark_SM_table}). In this, we differentiate between up-type quarks, with charge $-\frac{1}{3}e$, and down-type, quarks with charge $\frac{2}{3}e$. Quarks interact with all fundamental forces.\\
 Each quark carries a property called colour-charge. The possible color charges are red(r), green(gr), blue(bl) in which anti-quarks carry anti-colour. Quarks can only carry one colour, whilst every free particle has to be colorless\footnote{Colour confinement}. In conclusion we cannot observe a single quark.\\
 Free particles can achieve being colourless in two ways. Either by having all three colors present in the same amount (one quark of each color), which creates the characteristic group of baryons($qqq$) and anti-baryons($\bar{q}\bar{q}\bar{q}$) or by having a color and its anticolor present, which creates the group of mesons($q\bar{q}$).
 
-
 \begin{table}[H]
 \begin{center}
 \caption{Leptons in the standard model} \label{Lepton_SM_table}
@@ -46,8 +43,7 @@
 \end{center}
 \end{table}
 
-The other group consists of leptons(l)(see Table \ref{Lepton_SM_table}). They only interact through the weak and the electromagnetic force. Each generation consists of a lepton of charge -1 and a corresponding EM neutrally charged neutrino. The electron has the lowest energy of all charged leptons. This makes the electron stable while the higher generation particles decay to lower energy particles. \\ 
-\\
+The other group consists of leptons(l)(see Table \ref{Lepton_SM_table}). They only interact through the weak and the electromagnetic force. Each generation consists of a lepton of charge -1 and a corresponding EM neutrally charged neutrino. The electron has the lowest energy of all charged leptons. This makes the electron stable while the higher generation particles decay to lower energy particles.\\
 
 The leptons of one generation, namely the charged lepton and its corresponding neutrino are called a lepton family. A lepton of a family counts as 1 to its corresponding lepton family number whilst a anti-lepton counts as -1.
 
@@ -158,11 +154,12 @@
 \subsection{New physics}
 
 As a consequence of neutrino oscillation lepton flavour is a broken symmetry. The SM has to be adapted to include lepton flavour violation (LFV) and massive neutrinos. LFV is also expected for charged neutrinos.\\
-Although it has yet to be determined how LFV violation exactly works to which scale it exists.\\\\
+Although it has yet to be determined how LFV violation exactly works to which scale it exists.\\
 
 This may raise the question on why charged LFV has never been observed yet. This is especially surprising as the mixing angles of the neutrinos have been measured to be big.\\
 There are two reasons why charged LFV is strongly surpressed:
-The first is that charged leptons are much heavier than neutrinos and the other that the mass differences between neutrino flavour are tiny compared to the W boson mass.\\\\
+The first is that charged leptons are much heavier than neutrinos and the other that the mass differences between neutrino flavour are tiny compared to the W boson mass.\\
+
 In the classical SM, charged LFV is already forbidden at tree level. Though it can be induced indirectly through higher order loop diagrams (using neutrino oscillation). By adding new particles beyond the SM, we generate new ways for LFV in the charged sector to happen. As LFV is naturally generated in many models beyond the SM, finding charged LFV is a strong hint for new physics.
 
 \begin{figure}[H]
diff --git a/Report/02_mu_to_3e_decay.aux b/Report/02_mu_to_3e_decay.aux
index fc32772..dbbbbb0 100644
--- a/Report/02_mu_to_3e_decay.aux
+++ b/Report/02_mu_to_3e_decay.aux
@@ -1,19 +1,19 @@
 \relax 
 \providecommand\hyper@newdestlabel[2]{}
 \citation{blondel2013research}
-\@writefile{toc}{\contentsline {section}{\numberline {3}$\mu \rightarrow eee$ decay}{11}{section.3}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Kinematics}{11}{subsection.3.1}}
-\newlabel{Kinematics}{{3.1}{11}{Kinematics}{subsection.3.1}{}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Background events}{11}{subsection.3.2}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.1}}{11}{subsubsection.3.2.1}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.2}Michel decay}{12}{subsubsection.3.2.2}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.3}Radiative muon decay}{12}{subsubsection.3.2.3}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.4}BhaBha scattering}{12}{subsubsection.3.2.4}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.5}Pion decays}{12}{subsubsection.3.2.5}}
+\@writefile{toc}{\contentsline {section}{\numberline {3}$\mu \rightarrow eee$ decay}{12}{section.3}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Kinematics}{12}{subsection.3.1}}
+\newlabel{Kinematics}{{3.1}{12}{Kinematics}{subsection.3.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Background events}{12}{subsection.3.2}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.1}Internal conversions}{12}{subsubsection.3.2.1}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.2}Michel decay}{13}{subsubsection.3.2.2}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.3}Radiative muon decay}{13}{subsubsection.3.2.3}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.4}BhaBha scattering}{13}{subsubsection.3.2.4}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.5}Pion decays}{13}{subsubsection.3.2.5}}
 \citation{blondel2013research}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.6}Analysis of the background}{13}{subsubsection.3.2.6}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.6}Analysis of the background}{14}{subsubsection.3.2.6}}
 \@setckpt{02_mu_to_3e_decay}{
-\setcounter{page}{14}
+\setcounter{page}{15}
 \setcounter{equation}{4}
 \setcounter{enumi}{0}
 \setcounter{enumii}{0}
diff --git a/Report/02_mu_to_3e_decay.tex b/Report/02_mu_to_3e_decay.tex
index 455c036..88adb9f 100644
--- a/Report/02_mu_to_3e_decay.tex
+++ b/Report/02_mu_to_3e_decay.tex
@@ -21,10 +21,11 @@
 \left\vert \vec{p}_{tot} \right\vert = \left\vert\sum \vec{p}_i \right\vert = 0
 \end{equation}
 
-The particles resulting in the decay lie all in a plane. The resulting positrons and electrons are in the energy range of (0-53)MeV.\\
+The particles resulting in the decay lie all in a plane. The resulting positrons and electrons are in the energy range of (0-53)MeV.
 
 \subsection{Background events}
-\subsubsection{}
+
+\subsubsection{Internal conversions}
 The event $\mu \rightarrow eee\nu\nu$ results in the same particles seen by the detector as the event we are searching for\footnote{Neutrinos are invisible to our detector}. As a result it proves to be quite challenging to s$\mu \rightarrow eee\nu\nu$eparate the two.\\
 By using momentum conservation, it becomes possible to differentiate the $\mu \rightarrow eee$ and the $\mu \rightarrow eee\nu\nu$ events. In the muon rest frame the total momentum is zero and the energy of the resulting particles is equal to muon rest energy.\\
 By reconstructing the energy and momenta of the three $e$ we can check if their momenta add up to zero and their energies equal the muon rest energy. If not we can assume that there are additional neutrinos. This differentiation between the two events is crucial for the experiment as the $\mu \rightarrow eee\nu\nu$ events pose the most serious background for $\mu \rightarrow eee$ decay measurements.\\
diff --git a/Report/03_experimental_setup.aux b/Report/03_experimental_setup.aux
index 001ab2b..06175a0 100644
--- a/Report/03_experimental_setup.aux
+++ b/Report/03_experimental_setup.aux
@@ -1,30 +1,32 @@
 \relax 
 \providecommand\hyper@newdestlabel[2]{}
-\@writefile{toc}{\contentsline {section}{\numberline {4}Mu3e experiment}{14}{section.4}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Requirements}{14}{subsection.4.1}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Phase I}{14}{subsection.4.2}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Phase II}{14}{subsection.4.3}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Experimental setup}{14}{subsection.4.4}}
-\newlabel{exp_setup}{{4.4}{14}{Experimental setup}{subsection.4.4}{}}
-\newlabel{setup_Ia}{{4a}{15}{Setup of the detector in the first part of phase I\relax }{figure.caption.8}{}}
-\newlabel{sub@setup_Ia}{{a}{15}{Setup of the detector in the first part of phase I\relax }{figure.caption.8}{}}
-\newlabel{tracks_Ia}{{4b}{15}{Tracks in the detector in the first part of phase I\relax }{figure.caption.8}{}}
-\newlabel{sub@tracks_Ia}{{b}{15}{Tracks in the detector in the first part of phase I\relax }{figure.caption.8}{}}
-\newlabel{tracks_Ib,_II}{{4c}{15}{Tracks in the detector in the second part of phase I and Phase II\relax }{figure.caption.8}{}}
-\newlabel{sub@tracks_Ib,_II}{{c}{15}{Tracks in the detector in the second part of phase I and Phase II\relax }{figure.caption.8}{}}
-\newlabel{setup_Ib}{{4d}{15}{Setup of the detector in the second part of phase I\relax }{figure.caption.8}{}}
-\newlabel{sub@setup_Ib}{{d}{15}{Setup of the detector in the second part of phase I\relax }{figure.caption.8}{}}
-\newlabel{setup_II}{{4e}{15}{Setup of the detector in phase II\relax }{figure.caption.8}{}}
-\newlabel{sub@setup_II}{{e}{15}{Setup of the detector in phase II\relax }{figure.caption.8}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Setup of the detector during different phases of the experiment\relax }}{15}{figure.caption.8}}
+\@writefile{toc}{\contentsline {section}{\numberline {4}Mu3e experiment}{15}{section.4}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Requirements}{15}{subsection.4.1}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Phase I}{15}{subsection.4.2}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Phase II}{15}{subsection.4.3}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Experimental setup}{15}{subsection.4.4}}
+\newlabel{exp_setup}{{4.4}{15}{Experimental setup}{subsection.4.4}{}}
+\newlabel{setup_Ia}{{4a}{16}{Setup of the detector in the first part of phase I\relax }{figure.caption.8}{}}
+\newlabel{sub@setup_Ia}{{a}{16}{Setup of the detector in the first part of phase I\relax }{figure.caption.8}{}}
+\newlabel{tracks_Ia}{{4b}{16}{Tracks in the detector in the first part of phase I\relax }{figure.caption.8}{}}
+\newlabel{sub@tracks_Ia}{{b}{16}{Tracks in the detector in the first part of phase I\relax }{figure.caption.8}{}}
+\newlabel{tracks_Ib,_II}{{4c}{16}{Tracks in the detector in the second part of phase I and Phase II\relax }{figure.caption.8}{}}
+\newlabel{sub@tracks_Ib,_II}{{c}{16}{Tracks in the detector in the second part of phase I and Phase II\relax }{figure.caption.8}{}}
+\newlabel{setup_Ib}{{4d}{16}{Setup of the detector in the second part of phase I\relax }{figure.caption.8}{}}
+\newlabel{sub@setup_Ib}{{d}{16}{Setup of the detector in the second part of phase I\relax }{figure.caption.8}{}}
+\newlabel{setup_II}{{4e}{16}{Setup of the detector in phase II\relax }{figure.caption.8}{}}
+\newlabel{sub@setup_II}{{e}{16}{Setup of the detector in phase II\relax }{figure.caption.8}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Setup of the detector during different phases of the experiment\relax }}{16}{figure.caption.8}}
 \citation{augustin2017mupix}
 \citation{philipp2015hv}
 \citation{augustin2015mupix}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.5}The problem of low longitudinal momentum recurlers}{17}{subsection.4.5}}
-\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Particle recurling back into the center station\relax }}{18}{figure.caption.9}}
-\newlabel{recurler}{{5}{18}{Particle recurling back into the center station\relax }{figure.caption.9}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.5}The problem of low longitudinal momentum recurlers}{18}{subsection.4.5}}
+\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Particle recurling back into the center station\relax }}{19}{figure.caption.9}}
+\newlabel{recurler}{{5}{19}{Particle recurling back into the center station\relax }{figure.caption.9}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Particle recurling back into the center station\relax }}{20}{figure.caption.10}}
+\newlabel{recurler}{{6}{20}{Particle recurling back into the center station\relax }{figure.caption.10}{}}
 \@setckpt{03_experimental_setup}{
-\setcounter{page}{19}
+\setcounter{page}{21}
 \setcounter{equation}{4}
 \setcounter{enumi}{0}
 \setcounter{enumii}{0}
@@ -38,7 +40,7 @@
 \setcounter{subsubsection}{0}
 \setcounter{paragraph}{0}
 \setcounter{subparagraph}{0}
-\setcounter{figure}{5}
+\setcounter{figure}{6}
 \setcounter{table}{3}
 \setcounter{parentequation}{0}
 \setcounter{AM@survey}{0}
diff --git a/Report/03_experimental_setup.tex b/Report/03_experimental_setup.tex
index a5a5f9a..0f62004 100644
--- a/Report/03_experimental_setup.tex
+++ b/Report/03_experimental_setup.tex
@@ -15,7 +15,7 @@
 
 \subsection{Experimental setup}
 \label{exp_setup}
-The detector is of cylindrical shape around the beam. It has a total length of around $2m$ and is situated inside a $1T$ solenoid magnet with $1m$ of inner radius and a total length of $2.5m$. This form was chosen to cover as much phase space as possible. For an unknown decay such $\mu \rightarrow eee$, it crucial to have a high order of acceptance in all regions of phase space. There are only two kind of tracks that get lost. The first one are up- and downstream tracks and the second one are low transverse momenta tracks (no transversing of enough detector planes to be reconstructed).\\
+The detector is of cylindrical shape around the beam. It has a total length of around $2m$ and is situated inside a $1T$ solenoid magnet with $1m$ of inner radius and a total length of $2.5m$. This form was chosen to cover as much phase space as possible. For an unknown decay such $\mu \rightarrow eee$, it crucial to have a high order of acceptance in all regions of phase space. There are only two kind of tracks that get lost. The first one are up- and downstream tracks and the second one are low transverse momenta tracks (no transversing of enough detector planes to be reconstructed).
 
 \begin{figure}[H]
 \begin{center}
@@ -48,9 +48,9 @@
 \end{center}
 \end{figure}\newpage
 
-As seen in figure \ref{setup_II}, the final version of the detector can be divided into 5 separate parts in the longitudinal direction. There is the central part with the target, two inner silicon pixel layers, a fibre tracker and two outer silicon layers. The forward and backward parts, called recurl stations, consist only of a tile timing detector surrounded by two silicon recurl layers. A big advantage pf this layout is that even a partially constructed detector (gradally over phase I to phase II parts get added) can give us competitive measurements.\\\
+As seen in figure \ref{setup_II}, the final version of the detector can be divided into 5 separate parts in the longitudinal direction. There is the central part with the target, two inner silicon pixel layers, a fibre tracker and two outer silicon layers. The forward and backward parts, called recurl stations, consist only of a tile timing detector surrounded by two silicon recurl layers. A big advantage pf this layout is that even a partially constructed detector (gradally over phase I to phase II parts get added) can give us competitive measurements.\\
 
-The target itself is a big surfaced double cone with a surface length of $10cm$ and a width of $2cm$. The target was chosen specifically to be of this shape to facilitate separating tracks coming from different muons and hereby also helping to reduce accidental background.\\\
+The target itself is a big surfaced double cone with a surface length of $10cm$ and a width of $2cm$. The target was chosen specifically to be of this shape to facilitate separating tracks coming from different muons and hereby also helping to reduce accidental background.\\
 The two inner detector layers, also called vertex layers, span a length $12cm$. The innermost layer consists of 12 tiles while the outer vertex layer consists of 18 tiles. The tiles are each of $1cm$ width, with the inner layer having an average radius of $1.9cm$, respectively $2.9cm$ \cite{augustin2017mupix}, \cite{philipp2015hv}, \cite{augustin2015mupix}. They are supported by two half cylinder made up of $25\mu m$ thin Kapton foil mounted on plastic. The detector layers itself are $50\mu m$ thin and cooled by gaseous helium. The vertex detectors are read out at a rate of $20MHz$, giving us a time resolution of $20ns$.\\
 After the vertex layers the particles pass through the fibre tracker (see Figure \ref{tracks_Ib,_II}, \ref{setup_II}). It is positioned around $6cm$ away from the center. Its main job is to provide accurate timing information for the outgoing electrons and positrons. It consist of three to five layers, each consisting of $36cm$ long and $250\mu m$ thick scintillating fibres with fast silicon photomultipliers at the end. They provide us a timing information of less than a $1ns$.\\
 Next the outgoing particles encounter the outer silicon pixel detectors. They are mounted just after the fibre detector with average radii of $7.6cm$ and $8.9cm$. The inner layer has 24 and the outer has 28 tiles of $1cm$ length. The active area itself has a length of $36cm$. Similarly to the vertex detectors, they are mounted on $25\mu m$ thin Kapton foil with plastic ends.\\
@@ -58,17 +58,26 @@
 
 \subsection{The problem of low longitudinal momentum recurlers}
 
-As explained in section \ref{exp_setup}, the outgoing particles are supposed to recurl back into the outer stations of the detector to enable a precise measurement of the momentum. A problem arises if the particles have almost no momentum in the beam direction. Then they can recurl back into the central station and cause additional hits there. As the the central station is designed to let particles easily pass through, they can recurl inside the central station many more times without getting stopped. As we have a $20ns$ time window for the readout of the pixel detectors, we need a very reliable way to identify and reconstruct these tracks as recurling particles as otherwise they look exactly like newly produced particles coming from our target. As one can imagine this influences the precision of our measurements by a big margin. So finding a way to identify these low beam direction momentum particles consistently is of great importance as it is crucial for the experiment to reduce the background as much as possible.\\\\
+As explained in section \ref{exp_setup}, the outgoing particles are supposed to recurl back into the outer stations of the detector to enable a precise measurement of the momentum. A problem arises if the particles have almost no momentum in the beam direction. Then they can recurl back into the central station and cause additional hits there. As the the central station is designed to let particles easily pass through, they can recurl inside the central station many more times without getting stopped. As we have a $20ns$ time window for the readout of the pixel detectors, we need a very reliable way to identify and reconstruct these tracks as recurling particles as otherwise they look exactly like newly produced particles coming from our target. As one can imagine this influences the precision of our measurements by a big margin. So finding a way to identify these low beam direction momentum particles consistently is of great importance as it is crucial for the experiment to reduce the background as much as possible.\\
 
 There is already an existing software to reconstruct particle tracks. However it struggles to find the right tracks for a lot of the particles recurling back into the center station.\\
 These recurlers will typically leave eight hits or more, four (one on each silicon pixel detector layer) when initially leaving the detector  and another four when initially falling back in. It is possible for these recurlers to produce even more hits when leaving the detector again but for this thesis we will be only focusing on these 8 hit tracks.\\
 The current reconstruction algorithm works by fitting helix paths with a $\chi^2$ method onto the 8 hits.\\
-However experience has shown that often the fit with the lowest $\chi^2$ isn't necessarily the right track. If we increase the $\chi^2$ limit value to some arbitrary limit, we get a selection of several possible tracks per particle. Without any additional tools however, it is impossible to figure out if the right track is in the selection\footnote{\alignLongunderstack{\text{Based on detector efficiency it is possible for a particle to leave less}\\ \text{than 8 tracks and therefore not be reconstructed by the algorithm}}} and if yes which one of them correct track is.\\ 
+However experience has shown that often the fit with the lowest $\chi^2$ isn't necessarily the right track. If we increase the $\chi^2$ limit value to some arbitrary limit, we get a selection of several possible tracks per particle. Without any additional tools however, it is impossible to figure out if the right track is in the selection\footnote{\alignLongunderstack{\text{Based on detector efficiency it is possible for a particle to leave less}\\ \text{than 8 tracks and therefore not be reconstructed by the algorithm}}} and if yes which one of them correct track is.
 
 \begin{figure}[H]
 \begin{center}
-\includegraphics[width=1\textwidth]{img/tracks_in_det_xy.png}
+\includegraphics[width=.8\textwidth]{img/tracks_in_det_xy.png}
 \caption{Particle recurling back into the center station}
 \label{recurler}
 \end{center}
-\end{figure}
\ No newline at end of file
+\end{figure}
+
+\begin{figure}[H]
+\begin{center}
+\includegraphics[width=.8\textwidth]{img/tracks_in_det_z.png}
+\caption{Particle recurling back into the center station}
+\label{recurler}
+\end{center}
+\end{figure}
+
diff --git a/Report/04_machine_learning.aux b/Report/04_machine_learning.aux
index d742127..324b040 100644
--- a/Report/04_machine_learning.aux
+++ b/Report/04_machine_learning.aux
@@ -5,50 +5,80 @@
 \citation{ML:XGBoost}
 \citation{chollet2015keras}
 \citation{abadi2016tensorflow}
-\@writefile{toc}{\contentsline {section}{\numberline {5}Machine learning}{19}{section.5}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Introduction}{19}{subsection.5.1}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Artificial neural networks}{19}{subsection.5.2}}
-\newlabel{neural_network_arch}{{6a}{20}{Architecture of a neural network\relax }{figure.caption.10}{}}
-\newlabel{sub@neural_network_arch}{{a}{20}{Architecture of a neural network\relax }{figure.caption.10}{}}
-\newlabel{neuron}{{6b}{20}{Neuron\relax }{figure.caption.10}{}}
-\newlabel{sub@neuron}{{b}{20}{Neuron\relax }{figure.caption.10}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Neural network architecture\relax }}{20}{figure.caption.10}}
+\@writefile{toc}{\contentsline {section}{\numberline {5}Machine learning}{21}{section.5}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Introduction}{21}{subsection.5.1}}
+\newlabel{ML_Intro}{{5.1}{21}{Introduction}{subsection.5.1}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Artificial neural networks}{21}{subsection.5.2}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.1}General concepts}{21}{subsubsection.5.2.1}}
+\newlabel{neural_network_arch}{{7a}{22}{Architecture of a neural network\relax }{figure.caption.11}{}}
+\newlabel{sub@neural_network_arch}{{a}{22}{Architecture of a neural network\relax }{figure.caption.11}{}}
+\newlabel{neuron}{{7b}{22}{Neuron\relax }{figure.caption.11}{}}
+\newlabel{sub@neuron}{{b}{22}{Neuron\relax }{figure.caption.11}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Neural network architecture\relax }}{22}{figure.caption.11}}
 \citation{klambauer2017self}
-\newlabel{selu}{{7a}{21}{Selu activation function\relax }{figure.caption.11}{}}
-\newlabel{sub@selu}{{a}{21}{Selu activation function\relax }{figure.caption.11}{}}
-\newlabel{relu}{{7b}{21}{Relu activation function\relax }{figure.caption.11}{}}
-\newlabel{sub@relu}{{b}{21}{Relu activation function\relax }{figure.caption.11}{}}
-\newlabel{tanh}{{7c}{21}{Tanh activation function\relax }{figure.caption.11}{}}
-\newlabel{sub@tanh}{{c}{21}{Tanh activation function\relax }{figure.caption.11}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Activation functions\relax }}{21}{figure.caption.11}}
-\newlabel{MSE}{{8}{22}{Artificial neural networks}{equation.5.8}{}}
-\newlabel{BC}{{9}{22}{Artificial neural networks}{equation.5.9}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.2}Activation functions}{23}{subsubsection.5.2.2}}
+\newlabel{selu}{{8a}{23}{Selu, elu activation function\relax }{figure.caption.12}{}}
+\newlabel{sub@selu}{{a}{23}{Selu, elu activation function\relax }{figure.caption.12}{}}
+\newlabel{relu}{{8b}{23}{Relu activation function\relax }{figure.caption.12}{}}
+\newlabel{sub@relu}{{b}{23}{Relu activation function\relax }{figure.caption.12}{}}
+\newlabel{tanh}{{8c}{23}{Tanh activation function\relax }{figure.caption.12}{}}
+\newlabel{sub@tanh}{{c}{23}{Tanh activation function\relax }{figure.caption.12}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Activation functions\relax }}{23}{figure.caption.12}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.3}Concepts of training}{24}{subsubsection.5.2.3}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.4}Loss functions}{24}{subsubsection.5.2.4}}
+\newlabel{MSE}{{8}{25}{Loss functions}{equation.5.8}{}}
+\newlabel{BC}{{9}{25}{Loss functions}{equation.5.9}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.5}Stochastic gradient descent}{25}{subsubsection.5.2.5}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.6}Stochastic gradient descent with Momentum}{25}{subsubsection.5.2.6}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.7}RMSProp}{25}{subsubsection.5.2.7}}
+\citation{chilimbi2014project}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.8}Adam}{26}{subsubsection.5.2.8}}
+\newlabel{adam_alg}{{10}{26}{Adam}{equation.5.10}{}}
+\citation{ioffe2015batch}
+\citation{cooijmans2016recurrent}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.9}Decaying learning rate}{27}{subsubsection.5.2.9}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.10}Batch normalisation}{27}{subsubsection.5.2.10}}
+\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces The effects of Batch Normalization on data\relax }}{27}{figure.caption.13}}
+\newlabel{batch_norm}{{9}{27}{The effects of Batch Normalization on data\relax }{figure.caption.13}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Recurrent Neural Networks}{27}{subsection.5.3}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.3.1}General concepts}{27}{subsubsection.5.3.1}}
+\citation{schuster1997bidirectional}
+\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces General RNN architecture\relax }}{28}{figure.caption.14}}
+\newlabel{RNN_arch}{{10}{28}{General RNN architecture\relax }{figure.caption.14}{}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.3.2}Most common architectures}{28}{subsubsection.5.3.2}}
+\citation{gers1999learning}
+\citation{chung2014empirical}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.3.3}Cell types}{29}{subsubsection.5.3.3}}
+\citation{ML:XGBoost}
+\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces Architecture of a LSTM cell\relax }}{30}{figure.caption.15}}
+\newlabel{LSTM_arch}{{11}{30}{Architecture of a LSTM cell\relax }{figure.caption.15}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {5.4}XGBoost}{30}{subsection.5.4}}
 \@setckpt{04_machine_learning}{
-\setcounter{page}{24}
-\setcounter{equation}{9}
+\setcounter{page}{31}
+\setcounter{equation}{11}
 \setcounter{enumi}{0}
 \setcounter{enumii}{0}
 \setcounter{enumiii}{0}
 \setcounter{enumiv}{0}
-\setcounter{footnote}{17}
+\setcounter{footnote}{19}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{0}
 \setcounter{section}{5}
-\setcounter{subsection}{2}
+\setcounter{subsection}{4}
 \setcounter{subsubsection}{0}
 \setcounter{paragraph}{0}
 \setcounter{subparagraph}{0}
-\setcounter{figure}{7}
+\setcounter{figure}{11}
 \setcounter{table}{3}
 \setcounter{parentequation}{0}
 \setcounter{AM@survey}{0}
 \setcounter{ContinuedFloat}{0}
-\setcounter{subfigure}{3}
+\setcounter{subfigure}{0}
 \setcounter{subtable}{0}
 \setcounter{float@type}{4}
 \setcounter{Item}{0}
-\setcounter{Hfootnote}{17}
-\setcounter{bookmark@seq@number}{24}
+\setcounter{Hfootnote}{19}
+\setcounter{bookmark@seq@number}{39}
 \setcounter{@stackindex}{1}
 \setcounter{ROWcellindex@}{0}
 \setcounter{TABrowindex@}{2}
diff --git a/Report/04_machine_learning.tex b/Report/04_machine_learning.tex
index c2fe3bf..49fe009 100644
--- a/Report/04_machine_learning.tex
+++ b/Report/04_machine_learning.tex
@@ -1,6 +1,7 @@
 \section{Machine learning}
 
 \subsection{Introduction}
+\label{ML_Intro}
 
 Machine learning has already proven itself to be very successful in resolving many problems in numerous other areas of science and also in the private sector. Based on these promising results, scientists are eager to study the potential of machine learning in physics.\\
 
@@ -8,6 +9,8 @@
 
 \subsection{Artificial neural networks}
 
+\subsubsection{General concepts}
+
 The fundamental concept behind artificial neural networks is to imitate the architecture of the human brain. They can be used for classification problems as well as regression problems. In its most simple form it can be thought of some sort of mapping from some input to some target. For this thesis two neural networks of a special subtype of neural networks, called recurrent neural networks, were used. All of the networks used in this thesis were written in the python library Keras \cite{chollet2015keras} with a Tensorflow \cite{abadi2016tensorflow} backend. In this section the basic principles of neural networks will be explained.\\
 
 A neural network consists of many neurons organized in layers as seen in figure \ref{neural_network_arch}. Each neuron is connected to every neuron in the neighbouring layers, while each of these connections has a specific weight assigned to it.\\
@@ -38,6 +41,8 @@
 
 There is no way of knowing how many dimensions and layers will give you the best performance, as one can only define general effects of what happens when they are being modified. Generally, increasing the number of layers enables the system to solve more complex problems, while more dimensions make the system more flexible. However, even these general guidelines are to be applied with caution. For example; adding too many layers can cause the system to train exceedingly slow, whilst adding to many neurons with a too small training set can result in overfitting\footnote{When a system performs well on the training set but poorly on the test set}. Depending on the problem and the data given, each has its own optimal configuration. By gaining more experience with NN, people can take better guesses where to start. However, in the end it always results in some sort of systematic trial and error to find the optimal configuration.\\
 
+\subsubsection{Activation functions}
+
 The two RNN's used in this thesis both use $selu$'s \cite{klambauer2017self} as well as $tanh$ and $relu$'s as their activation functions.
 
 \begin{align}
@@ -52,7 +57,7 @@
 \begin{center}
 \begin{subfigure}{0.45\textwidth}
 \includegraphics[width=1\textwidth]{img/selu.png}
-\caption{Selu activation function}
+\caption{Selu, elu activation function}
 \label{selu}
 \end{subfigure}
 \begin{subfigure}{0.45\textwidth}
@@ -69,11 +74,16 @@
 \end{center}
 \end{figure}
 
-Where $\lambda$ and $\alpha$ are fixed parameters\footnote{For standard scaled inputs (mean $= 0$, stddev. $=1.0$): $\alpha \approx 1.6732$, $\lambda \approx 1.0507$}. Selu's have the advantage of normalizing the output. As a rule of thumb, normalized inputs usually tend to give better results (The output of the neurons are the input of other neurons). Using a $tanh$ was the standard approach for a long time although it has some disadvantages over the other activation functions. This is because its slope becomes really small for large numbers, which slows down training noticeably.\\
+Where $\lambda$ and $\alpha$ are fixed parameters\footnote{For standard scaled inputs (mean $= 0$, stddev. $=1.0$): $\alpha \approx 1.6732$, $\lambda \approx 1.0507$}. Selu's have the advantage of normalizing the output. As a rule of thumb, normalized inputs usually tend to give better results (The output of the neurons are the input of other neurons). Using a $tanh$ was the standard approach for a long time although it has some disadvantages over the other activation functions. This is because its slope becomes really small for large numbers, which slows down training noticeably.
+
+\subsubsection{Concepts of training}
 
 The neural network is trained with a sample of events. This sample consists of a few input parameters and a training target, which is the value the neural network will be trained to predict. Three important terms for the training of a neural network are epochs, batch size and loss function.\\
 An epoch refers to one training iteration, where all of the training samples get used once and the weights and biases get modified to fit the wanted targets better. Usually a system is trained over many epochs until the weights and biases stay approximately constant at their optimal values.\\
-Batch size refers to the number of examples that are given to the system at once during the training. Batch size should neither be chosen too small, e.g. small batch sizes train slower, nor too big, some randomness is wanted. Experience shows, that a reasonable batch size usually lies between 10 to 100 examples per batch. It is important to note that by decreasing batch size we make the minimum of the mapping we want to find wider. This makes finding the general area of the minimum easier. However if the minimum gets too wide, the slope gets to small to reach the minimum in a reasonable time. On the other side by increasing the batch size too much, the minimum gets exceedingly narrower and it possible to continuously keep "jumping" over the minimum with every training step performed.\\
+Batch size refers to the number of examples that are given to the system at once during the training. Batch size should neither be chosen too small, e.g. small batch sizes train slower, nor too big, some randomness is wanted. Experience shows, that a reasonable batch size usually lies between 10 to 100 examples per batch. It is important to note that by decreasing batch size we make the minimum of the mapping we want to find wider. This makes finding the general area of the minimum easier. However if the minimum gets too wide, the slope gets to small to reach the minimum in a reasonable time. On the other side by increasing the batch size too much, the minimum gets exceedingly narrower and it possible to continuously keep "jumping" over the minimum with every training step performed.
+
+\subsubsection{Loss functions}
+
 To train the system we need some way to parametrize the quality of our predictions. To account for that we use a loss function. A loss function takes the predicted values of the system and the targeted values to give us an absolute value of our performance. There are various loss functions. In the two RNN's "mean squared error"(MSE, formula \ref{MSE}) and "binary crossentropy"(BC, formula \ref{BC}) were being used. The goal of every NN is to minimize the loss function.
 
 \begin{align}
@@ -93,18 +103,137 @@
 \item $L(w,b)$ is the loss over $n$ events
 \end{itemize}
 
-There exist several methods to minimize the loss. The most simple one being stochastic gradient descent(SGD). When performing SGD we can calculate the gradient and just apply it to our weights and biases. By doing this repeatedly, we will eventually end up in a minimum\footnote{It is very possible to also just get stuck in a local minimum}.\\
-Trainings algorithm working with momentum are basically an improved version of SGD. To circumvent the problem of getting stuck in any minimum, our gradient can build up momentum of the past gradients. This is done by adding a momentum term to the applied changes to the weights and biases. The momentum is an exponentially decaying average over past gradients. This generally trains faster than SGD and has less potential to get stuck in local minima.\\
-Another commonly-used modification of stochastic gradient decent is an adaptive
-learning rate as implemented in the optimizer called RMSProp. This algorithm scales
-the learning rate of each individual parameter by an exponentially decaying average
-of the past squared gradients. The adaptation of the learning rate is done to set a
-large learning rate if the past gradients were small in order to increase the step size
-and vice versa. The average of the past squared gradients is exponentially decaying
-since otherwise the learning rate would get really small after a few iterations.
-The optimizer used in this thesis is called adam which stands for Adaptive Moment Estimation
-[25]. Adam can be described as a combination of the Momentum and RMSProp
-method since the estimates of the first and second moments of the past gradients are
-used to scale the learning rate of each individual parameter. The first moment is
-an exponentially decaying average of past gradients as in Momentum and the second
-moment is an exponentially decaying average of past squared gradients as in RMSProp.
\ No newline at end of file
+\subsubsection{Stochastic gradient descent}
+
+There exist several methods to minimize the loss. The most simple one being stochastic gradient descent(SGD). When performing SGD we can calculate the gradient and just apply it to our weights and biases. By doing this repeatedly, we will eventually end up in a minimum\footnote{It is very possible to also just get stuck in a local minimum}.
+
+\subsubsection{Stochastic gradient descent with Momentum}
+
+Trainings algorithm working with momentum are basically an improved version of SGD. To circumvent the problem of getting stuck in any minimum, our gradient can build up momentum of the past gradients. This is done by adding a momentum term to the applied changes to the weights and biases. The momentum is an exponentially decaying average over past gradients. This generally trains faster than SGD and has less potential to get stuck in local minima.
+
+\subsubsection{RMSProp}
+
+Another improved version of SGD is RMSProp. The RMSProp algorithm scales the learning rate of each individual parameter by an exponentially decaying average of the past squared gradients. This has the effect, that the learning rate increases if the past gradients were small to increase step size and vice versa. Additionally the average of the past squared gradients decays exponentially to prevent the step size from getting too small.
+
+\subsubsection{Adam}
+
+The most commonly used algorithm however is the Adam \cite{chilimbi2014project}, which stands for Adaptive Moment estimation, training algorithm (see formulas \ref{adam_alg}). Is is essentially a combination of Momentum and RMSProp and takes the best of both. It is also the one used to train both RNN's of this thesis as it converges the quickest and most reliable to the global minimum. The algorithm contains two moments. The first moment is an exponentially decaying average of past gradients as in Momentum while the second
+moment is an exponentially decaying average of past squared gradients as in RMSProp.
+
+
+\begin{center}
+Initial state:
+\begin{equation*}
+V_{d_W} = 0, S_{d_W} = 0, V_{d_b} = 0, S_{d_b} = 0
+\end{equation*}
+On iteration t:
+\begin{equation}
+\begin{split}
+V_{d_W} = \beta_1 V_{d_W} + (1-\beta_1) dW,  V_{d_b} = \beta_1 V_{d_b} + (1-\beta_1) db\\
+S_{d_W} = \beta_2 S_{d_W} + (1-\beta_2) dW^2,  S_{d_b} = \beta_2 S_{d_b} + (1-\beta_2) db^2 \\
+V^{corrected}_{dW} = \frac{V_{dW}}{1-\beta_1^t}, V^{corrected}_{db} = \frac{V_{db}}{1-\beta_1^t}\\
+S^{corrected}_{dW} = \frac{S_{dW}}{1-\beta_2^t}, S^{corrected}_{db} = \frac{S_{db}}{1-\beta_2^t}\\
+W_{t+1} = W_{t} - \alpha \frac{V^{corrected}_{dW}}{\sqrt{S^{corrected}_{dW}}+\epsilon}\\
+b_{t+1} = b_{t} - \alpha \frac{V^{corrected}_{db}}{\sqrt{S^{corrected}_{db}}+\epsilon}
+\label{adam_alg}
+\end{split}
+\end{equation}
+\end{center}
+
+With:
+
+\begin{itemize}
+\item $V_{d_W}$, $V_{db}$ correspond to the Momentum part
+\item $S_{d_W}$, $S_{db}$ correspond to the RMSProp part
+\item $\epsilon$ this constant is chosen to be very small and only there to prevent division by $0$ (usually $\epsilon = 10^{-8}$)
+\item $\alpha$: learning rate (needs to be tuned according to the problem)
+\item $\beta_1$ decay constant of the Momentum part (usually $\beta_1 = 0.9$)
+\item $\beta_2$ decay constant of the RMSProp part (usually $\beta_2 = 0.999$)
+\end{itemize}
+
+\subsubsection{Decaying learning rate}
+
+To counteract the problem of "jumping" over the minimum repeatedly, some NN also use a decaying learning rate during their training. By using this the step size gets smaller with every consecutive step which should in principle result in the step size converging to zero when reaching the global minimum. Most NN's, as well as the two RNN's used in this thesis, usually don't use a decaying learning rate as the Adam algorithm on its own already performs well enough.
+
+\subsubsection{Batch normalisation}
+
+Another important technique often used in NN is Batch Normalisation \cite{ioffe2015batch}, \cite{cooijmans2016recurrent}. By performing Batch Normalization we normalize and center the input around zero in between every layer of the NN. Batch Normalization has proven to be a potent technique to make NN train faster and even perform better.
+
+\begin{figure}[H]
+\begin{center}
+\includegraphics[width=1\textwidth]{img/batch_norm.jpeg}
+\caption{The effects of Batch Normalization on data}
+\label{batch_norm}
+\end{center}
+\end{figure}
+
+\subsection{Recurrent Neural Networks}
+
+\subsubsection{General concepts}
+
+Recurrent Neural Networks(RNN) are subclass of neural networks and are specialised to deal with sequential data structures. There are various applications for RNN's such as speech recognition, music generation, sentiment classification, DNA sampling and so on. Generally normal NN don't perform that well on sequential data. One of the reasons is for example that it doesn't share features learned across different positions in the data\footnote{In our experiment positions of the particles with x,y,z in the detector}. Another problem is that the input and output don't necessarily have to have the same length every time.\\
+It is important to note that when using RNN's what the units we called neurons before are usually called cells.\\
+RNN's pose a much better representation of the data which also helps reducing the number of variables in the system and hereby make it train more efficiently. 
+
+\begin{figure}[H]
+\begin{center}
+\includegraphics[width=1\textwidth]{img/RNN_general_architecture.png}
+\caption{General RNN architecture}
+\label{RNN_arch}
+\end{center}
+\end{figure}
+
+With:
+
+\begin{itemize}
+\item $ x^{\langle t \rangle}$: Input at timestep $t$ with $T_x$ total steps
+\item $ \hat{y}^{\langle t \rangle}$: Output at timestep $t$
+\item $ a^{\langle 0 \rangle}$: Initial value given to the RNN in the first step
+\item $ a^{\langle t \rangle}$: Information passed over from the last step
+\end{itemize}
+
+In figure \ref{RNN_arch} the general architecture of a RNN can be seen. Every step of the input data ($ x^{\langle t \rangle}$) gets sequentially fed into the RNN which then generates some output $ \hat{y}^{\langle t \rangle}$ after every step of the input. To share already learned information and features for future steps, $ a^{\langle t \rangle}$ gets passed down as additional input into the RNN for the next step.
+
+\subsubsection{Most common architectures}
+
+There are two concepts of how the data is fed into the system and three structures of RNN's depending on the input and output of the system.\\
+
+Usually the data is fed into the system step by step just. For problems where note the entire sequence is known already at this is the only way to feed the data into the system.\\
+If however the entire sequence is already known at the beginning, e.g. sequence classification, often the information is fed into the system from both sides. Networks with this specific architecture are called bidirectional RNN's \cite{schuster1997bidirectional}. This often increases the systems performance.\\
+However as with the first RNN, we wanted to predict particle tracks after leaving the detector, we could only use a one directional RNN as the whole track wasn't available. The second RNN is actually a classifier of the tracks. With the whole information available from the start, it was designed to be a bidirectional RNN.\\
+
+A system has a "many-to-one" architecture, if we have a sequential input but we only care the final output of the system, e.g. classification problems. This is the architecture used for both RNN's. With the same reasoning, if we have sequential inputs and want care about the output generated at each step, e.g. speech recognition, the architecture is called "many-to-many". A "one-to-one" architecture is basically just a regular NN.
+
+\subsubsection{Cell types}
+
+Besides the basic RNN cell type, which shall not be discussed in detail in this thesis, the two most influential and successful cell types are Long-Short-Term-Memory(LSTM) \cite{gers1999learning} cells and Gated Recurrent Units(GRU) \cite{chung2014empirical}. However in this thesis only LSTM cells will be explained in greater detail as the were the only cells used in the RNN's.\\
+
+GRU's were invented with the intention to create a cell type with a similar performance to the LSTM cell while having a simpler internal structure. By being less complex as an LSTM cell a GRU cell has also less parameters to modify during training which also speeds up training.\\
+
+LSTM cells (see figure \ref{LSTM_arch} have many useful properties such as a forget gate, an update gate as well as an output gate. With this cell type, it is easy to pass down information for the following steps without it being altered in a big way (Long term memory). However, there are also ways built in to update this passed down information with new one (Short term memory). Even though GRU's are gaining more and more traction, LSTM-cells are still widely considered to be the most successful type of cells.
+
+\begin{figure}[H]
+\begin{center}
+\includegraphics[width=0.8\textwidth]{img/LSTM_cell.png}
+\caption{Architecture of a LSTM cell}
+\label{LSTM_arch}
+\end{center}
+\end{figure}
+
+The math behind the LSTM cell looks as follows\footnote{The notation used is the same as in figure \ref{LSTM_arch}}:
+
+\begin{equation}
+\begin{split}
+\text{\~{c}}^{\langle t \rangle} = tanh(W_c \left[ a^{\langle t \rangle}, x^{\langle t \rangle} \right] + b_c)\\
+\Gamma_u = \sigma(W_u \left[ a^{\langle t \rangle}, x^{\langle t \rangle} \right] + b_u)\\
+\Gamma_o = \sigma(W_o \left[ a^{\langle t \rangle}, x^{\langle t \rangle} \right] + b_o)\\
+c^{\langle t \rangle} = \Gamma_u \cdot \text{\~{c}}^{\langle t \rangle} + \Gamma_o \cdot tanh(c^{\langle t - 1 \rangle})\\
+a^{\langle t \rangle} = \Gamma_o \cdot tanh(c^{\langle t \rangle})
+\end{split}
+\end{equation}
+
+\subsection{XGBoost}
+
+XGBoost\cite{ML:XGBoost} is based on boosted decision trees (extreme gradient boosting). In this approach the data samples get split using a decision tree. With every step a new tree gets created to account for the errors of prior models, which are then added to create the final prediction. A gradient descent algorithm is used to minimize loss when adding new trees. \\
+
+Its is often used as a classifier, however it can also used in regression models. In this thesis, an XGBoost classifier was used to determine a baseline and have some comparison for our bidirectional RNN classifier.
\ No newline at end of file
diff --git a/Report/05_Data.aux b/Report/05_Data.aux
new file mode 100644
index 0000000..2027651
--- /dev/null
+++ b/Report/05_Data.aux
@@ -0,0 +1,44 @@
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\citation{agostinelli2003s}
+\citation{pedregosa2011scikit}
+\@writefile{toc}{\contentsline {section}{\numberline {6}Data}{31}{section.6}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.1}General information}{31}{subsection.6.1}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Preprocessing}{31}{subsection.6.2}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {6.2.1}Dataset 1}{31}{subsubsection.6.2.1}}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {6.2.2}Dataset 2}{32}{subsubsection.6.2.2}}
+\newlabel{dataset2}{{6.2.2}{32}{Dataset 2}{subsubsection.6.2.2}{}}
+\@setckpt{05_Data}{
+\setcounter{page}{33}
+\setcounter{equation}{11}
+\setcounter{enumi}{0}
+\setcounter{enumii}{0}
+\setcounter{enumiii}{0}
+\setcounter{enumiv}{0}
+\setcounter{footnote}{22}
+\setcounter{mpfootnote}{0}
+\setcounter{part}{0}
+\setcounter{section}{6}
+\setcounter{subsection}{2}
+\setcounter{subsubsection}{2}
+\setcounter{paragraph}{0}
+\setcounter{subparagraph}{0}
+\setcounter{figure}{11}
+\setcounter{table}{3}
+\setcounter{parentequation}{0}
+\setcounter{AM@survey}{0}
+\setcounter{ContinuedFloat}{0}
+\setcounter{subfigure}{0}
+\setcounter{subtable}{0}
+\setcounter{float@type}{4}
+\setcounter{Item}{0}
+\setcounter{Hfootnote}{22}
+\setcounter{bookmark@seq@number}{44}
+\setcounter{@stackindex}{1}
+\setcounter{ROWcellindex@}{0}
+\setcounter{TABrowindex@}{2}
+\setcounter{TABcolindex@}{1}
+\setcounter{TABalignmentindex@}{0}
+\setcounter{pp@next@reset}{0}
+\setcounter{section@level}{3}
+}
diff --git a/Report/05_Data.tex b/Report/05_Data.tex
new file mode 100644
index 0000000..5d7084a
--- /dev/null
+++ b/Report/05_Data.tex
@@ -0,0 +1,21 @@
+\section{Data}
+
+\subsection{General information}
+There were two sets of data used in this thesis. First, each of the datasets were shuffled to counteract any bias given by the sequence of the data and then split into two parts. $80\%$ was used to train the model(training set) while the remaining $20\%$ were later used to test the model(test set).\\
+The sets were created using a Geant4 \cite{agostinelli2003s} based simulation with the specific configuration of the $\mu \rightarrow 3e$-experiment configuration.\\
+
+The first dataset(dataset 1) contained 46896 true 8-hit tracks of recurling particles, and each hit consisting of 3 coordinates (x,y,z).\\
+
+The second dataset(dataset 2) contained 109821 tracks. These were exclusively tracks that the current track reconstruction algorithm wasn't conclusively able to assign to an event. As a result every event contained all the preselected tracks, computed by the already existing algorithm, that were calculated to be a possible track. It is important to note that only for around $75\%$ of the events, the true track was in this preselection. This posed an additional challenge, as one could not just simply chose the best fitting track. To assign the tracks to their corresponding events, they all carried an event number with them matching them with their event.\footnote{One number for all tracks of the same events}. Each track contained the coordinates of the 8 hits (x,y,z), the value of the $\chi^2$-fit performed by the reconstruction algorithm, the event number as well as a label with told us if the track was true or false\footnote{Only used for training and testing of the system}.
+
+\subsection{Preprocessing}
+
+\subsubsection{Dataset 1}
+
+To optimize the data fed into the RNN, dataset 1 was preprocessed. In a first step a a min-max scaler with a range of $[-0.9,0.9]$ from the python library Scikit-learn \cite{pedregosa2011scikit} was used. In a second step the data got shuffled and split into the training and test sets. The first four steps were used as an input for the RNN while the second four steps were our prediction target.
+
+\subsubsection{Dataset 2}
+\label{dataset2}
+
+Analogously to dataset 1, first the coordinates of the tracks as well as the $\chi^2$ were scaled with a min max scaler (separate ones) with a range of $[-0.9,0.9]$ from the python library Scikit-learn. Then the first four steps of every track were taken and fed into our first track predicting RNN. For each of the last four steps of a track we then had two sets of coordinates. One were the predicted coordinates of our RNN and the other one the coordinates given by the reconstructing algorithm. To have the information of the $\chi^2$ fit available at each step, we created a an array of shape $(\#tracks, steps, 4)$ (1 dimension for each of the coordinates and another for the $\chi^2$ fit). However at the spot of the x,y,z coordinates there were neither the predicted coordinates of our RNN nor the coordinates given by the reconstructing algorithm but instead the difference of the two. Our target was the truth value of each track\footnote{$1 =$ true, $0 =$ false}.
+
diff --git a/Report/06_RNN_used.aux b/Report/06_RNN_used.aux
new file mode 100644
index 0000000..114243c
--- /dev/null
+++ b/Report/06_RNN_used.aux
@@ -0,0 +1,43 @@
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\@writefile{toc}{\contentsline {section}{\numberline {7}RNN's used}{33}{section.7}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {7.1}RNN for track prediction}{33}{subsection.7.1}}
+\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces RNN Prediction architecture\relax }}{33}{figure.caption.16}}
+\newlabel{RNN_pr_arch}{{12}{33}{RNN Prediction architecture\relax }{figure.caption.16}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {7.2}RNN for classification of tracks}{34}{subsection.7.2}}
+\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces RNN classifier architecture\relax }}{35}{figure.caption.17}}
+\newlabel{RNN_cl_arch}{{13}{35}{RNN classifier architecture\relax }{figure.caption.17}{}}
+\@setckpt{06_RNN_used}{
+\setcounter{page}{37}
+\setcounter{equation}{11}
+\setcounter{enumi}{0}
+\setcounter{enumii}{0}
+\setcounter{enumiii}{0}
+\setcounter{enumiv}{0}
+\setcounter{footnote}{24}
+\setcounter{mpfootnote}{0}
+\setcounter{part}{0}
+\setcounter{section}{7}
+\setcounter{subsection}{2}
+\setcounter{subsubsection}{0}
+\setcounter{paragraph}{0}
+\setcounter{subparagraph}{0}
+\setcounter{figure}{13}
+\setcounter{table}{3}
+\setcounter{parentequation}{0}
+\setcounter{AM@survey}{0}
+\setcounter{ContinuedFloat}{0}
+\setcounter{subfigure}{0}
+\setcounter{subtable}{0}
+\setcounter{float@type}{4}
+\setcounter{Item}{0}
+\setcounter{Hfootnote}{24}
+\setcounter{bookmark@seq@number}{47}
+\setcounter{@stackindex}{1}
+\setcounter{ROWcellindex@}{0}
+\setcounter{TABrowindex@}{2}
+\setcounter{TABcolindex@}{1}
+\setcounter{TABalignmentindex@}{0}
+\setcounter{pp@next@reset}{0}
+\setcounter{section@level}{2}
+}
diff --git a/Report/06_RNN_used.tex b/Report/06_RNN_used.tex
new file mode 100644
index 0000000..48bb9a6
--- /dev/null
+++ b/Report/06_RNN_used.tex
@@ -0,0 +1,62 @@
+\section{RNN's used}
+
+\subsection{RNN for track prediction}
+
+The first RNN had the task to predict the positions of the recurled 4 hits. As input the 4 hits of an outgoing particle are used.
+
+\begin{figure}[h]
+\begin{center}
+\includegraphics[width=1\textwidth]{img/RNN-Pred-Arch.png}
+\caption{RNN Prediction architecture}
+\label{RNN_pr_arch}
+\end{center}
+\end{figure}
+
+\newpage
+Figure \ref{RNN_pr_arch} shows the architecture used for the RNN track prediction. It is a one directional RNN with following layout for its layers:
+
+\begin{itemize}
+\item[1. Layer:] 50 LSTM cells
+\item[2. Layer:] 50 LSTM cells
+\item[3. Layer:] 50 Dense cells\footnote{Dense cells are basically just basic NN cells as explained in section \ref{ML_Intro}}
+\item[4. Layer:] 12 Dense cells
+\end{itemize} 
+
+The optimal number of layers, cells and cell-type was found by systematically comparing RNN's that are equal besides one property (e.g. Using GRU's instead of LSTM cells). Also all the activation functions were chosen to be selu's.\\
+The loss and metric function used were the mean squared error(mse) as this had the most similarity with an euclidian distance. The model itself was trained by an Adam algorithm.\\
+The output was a 12 dimensional vector of the shape: $(x_5, y_5, z_5, x_6, y_6, z_6, ..., z_8)$. Note that the numeration starts with 5 as the 5$^\text{th}$ hit of the track is the first one to be predicted.
+
+\subsection{RNN for classification of tracks}
+
+The second RNN was used a classifier to find the right tracks. As already described in section \ref{dataset2}, the input data was of shape $(batchsize, 4, 4)$ with $(\Delta x_i, \Delta y_i, \Delta z_i, \chi^2)_{\text{at step i}}$.\\
+
+Where:
+
+\begin{itemize}
+\item $\Delta x_i = x_{i,\text{preselected}} - x_{i,\text{predicted}}$, the difference between the by the original tracking algorithm preselected track and the by the RNN predicted one
+\item $\Delta y_i, \Delta z_i$ same as for $\Delta x_i$
+\item Value of the $\chi^2$ fit
+\end{itemize}
+
+The output was then just a one dimensional vector, where $1$ stands for a true track and $0$ stands for a false track. The RNN itself is going to predict a number between $0$ and $1$ which can be interpreted as amount of confidence that it is a true track.
+
+\begin{figure}[H]
+\begin{center}
+\includegraphics[width=0.75\textwidth]{img/RNN-Classifier-Arch.png}
+\caption{RNN classifier architecture}
+\label{RNN_cl_arch}
+\end{center}
+\end{figure}
+
+The RNN for the classification was chosen to be bidirectional and as in the RNN before LSTM cells were used. Here a tanh was used for all the activation functions besides the last one. The last layer used a softmax activation function\footnote{Similar to a tanh but bounded between [0,1]} As tanh doesn't automatically do batch normalization, between every layer of cells a batch normalization layer was added.\\
+The layout of the layer was as follows:
+
+\begin{itemize}
+\item[1. Layer:] 30 LSTM cells (bidirectional, batch normalization)
+\item[2. Layer:] 30 LSTM cells (bidirectional, batch normalization)
+\item[3. Layer:] 30 LSTM cells (bidirectional, batch normalization)
+\item[4. Layer:] 50 Dense cells (batch normalization)
+\item[5. Layer:] 1 Dense cell (softmax actication function)
+\end{itemize} 
+
+The optimal number of layers, cells and cell-type was found by systematically comparing different RNN architectures. Also it is important to note that the  second RNN is directly dependant of the first RNN. When changing the first RNN one would also have to retrain the second.
\ No newline at end of file
diff --git a/Report/07_Analysis.aux b/Report/07_Analysis.aux
new file mode 100644
index 0000000..714ae85
--- /dev/null
+++ b/Report/07_Analysis.aux
@@ -0,0 +1,50 @@
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\@writefile{toc}{\contentsline {section}{\numberline {8}Results}{37}{section.8}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {8.1}Best $\chi ^2$}{37}{subsection.8.1}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {8.2}RNN classifier with RNN track prediction input}{37}{subsection.8.2}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {8.3}XGBoost}{37}{subsection.8.3}}
+\newlabel{XGB_tp_fp_hist}{{14a}{38}{Number of false positives and false negatives depending cut\relax }{figure.caption.18}{}}
+\newlabel{sub@XGB_tp_fp_hist}{{a}{38}{Number of false positives and false negatives depending cut\relax }{figure.caption.18}{}}
+\newlabel{XGB_ROC}{{14b}{38}{ROC curve for the XGBoost model\relax }{figure.caption.18}{}}
+\newlabel{sub@XGB_ROC}{{b}{38}{ROC curve for the XGBoost model\relax }{figure.caption.18}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces XGBoost classifier figures\relax }}{38}{figure.caption.18}}
+\citation{ML:ROC_AUC:Bradley:1997:UAU:1746432.1746434}
+\citation{gent1992special}
+\citation{graves2013speech}
+\@writefile{toc}{\contentsline {subsection}{\numberline {8.4}Comparison in performance of the RNN and XGBoost}{39}{subsection.8.4}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {8.5}Outlook}{39}{subsection.8.5}}
+\@setckpt{07_Analysis}{
+\setcounter{page}{40}
+\setcounter{equation}{11}
+\setcounter{enumi}{0}
+\setcounter{enumii}{0}
+\setcounter{enumiii}{0}
+\setcounter{enumiv}{0}
+\setcounter{footnote}{28}
+\setcounter{mpfootnote}{0}
+\setcounter{part}{0}
+\setcounter{section}{8}
+\setcounter{subsection}{5}
+\setcounter{subsubsection}{0}
+\setcounter{paragraph}{0}
+\setcounter{subparagraph}{0}
+\setcounter{figure}{14}
+\setcounter{table}{3}
+\setcounter{parentequation}{0}
+\setcounter{AM@survey}{0}
+\setcounter{ContinuedFloat}{0}
+\setcounter{subfigure}{2}
+\setcounter{subtable}{0}
+\setcounter{float@type}{4}
+\setcounter{Item}{0}
+\setcounter{Hfootnote}{28}
+\setcounter{bookmark@seq@number}{53}
+\setcounter{@stackindex}{1}
+\setcounter{ROWcellindex@}{0}
+\setcounter{TABrowindex@}{2}
+\setcounter{TABcolindex@}{1}
+\setcounter{TABalignmentindex@}{0}
+\setcounter{pp@next@reset}{0}
+\setcounter{section@level}{2}
+}
diff --git a/Report/07_Analysis.tex b/Report/07_Analysis.tex
new file mode 100644
index 0000000..3b53c4d
--- /dev/null
+++ b/Report/07_Analysis.tex
@@ -0,0 +1,45 @@
+\section{Results}
+
+\subsection{Best $\chi^2$}
+
+The most simple version to try to classify which one is the right path out of the preselection would be to just take a the path with the smallest $\chi^2$. Like this we would choose the path that agrees the most with the track reconstructing algorithm that gives us our preselection. However, as already mentioned, in dataset 2 only around $75\%$ of the events even have the true track among the ones preselected by the reconstruction\footnote{E.g. by not having all 8 hits as a result of detector efficiency (searches for 8 hits)}. In this case we would have to label all the tracks as false tracks. By simply choosing the best $\chi^2$ we don't account for this at all. So by default our maximum accuracy would be around $75\%$ if the true track would really always just be the one with the best $\chi^2$.\\
+
+It turns out the accuracy of this method is only at $52.01\%$. So there is a need for better algorithms to classify this problem.
+
+\subsection{RNN classifier with RNN track prediction input}
+
+The RNN's that we put in sequence (First track prediction then classification) are a much more complex model. When trained the were able to label all the tracks right with an accuracy of $87\%$. Note that the $75\%$ limit of always choosing one track for every event was exceeded\footnote{Usually the one that is considered the best by the corresponding algorithm}. 
+
+\subsection{XGBoost}
+
+Also an XGBoost classifier was implemented and trained to have some more comparison to the performance of our RNN classification. XGBoost models train much faster than NN and are often a serious competitor to them as often they reach similar performances. The input of XGBoost model was the same as for the RNN classification. The accuracy of this classifier of labelling the tracks was at $80.74\%$ with a cut applied at $0.5$. Note that here we also exceeded the $75\%$ even though with a smaller accuracy than the RNN.
+
+\begin{figure}[H]
+\begin{center}
+\begin{subfigure}{0.8\textwidth}
+\includegraphics[width=1\textwidth]{img/XGB_tf-ft_hist.png}
+\caption{Number of false positives and false negatives depending cut}
+\label{XGB_tp_fp_hist}
+\end{subfigure}
+\begin{subfigure}{0.8\textwidth}
+\includegraphics[width=1\textwidth]{img/XGB_ROC-curve.png}
+\caption{ROC curve for the XGBoost model}
+\label{XGB_ROC}
+\end{subfigure}
+\caption{XGBoost classifier figures}
+\end{center}
+\end{figure}
+
+As shown in figure \ref{XGB_tp_fp_hist} depending on where we apply the cut, we have a changing number of false positives and false negatives. In figure \ref{XGB_tp_fp_hist} the blue bins are false positives and the orange bins are false negatives. Depending on what is more important for the experiment\footnote{E.g. all positives have to be correct $\rightarrow$ increase cut}\\
+
+Figure \ref{XGB_ROC} shows the ROC curve \cite{ML:ROC_AUC:Bradley:1997:UAU:1746432.1746434} of the XGB classifier. Generally the more area under the ROC curve the better the classifier. In the perfect case, where everything gets labelled $100\%$ correctly, the area under the curve would be 1. Here we have an area of $0.88$.\\
+
+\subsection{Comparison in performance of the RNN and XGBoost}
+
+The RNN classifier performs with around $6\%$ better accuracy than the XGBoost classifier. Also by comparing the the ROC curves in figure \textbf{ROC of both}, one can clearly see that the area under the RNN ROC curve is bigger.
+
+\subsection{Outlook}
+
+Where do we want to go from here? One way to improve the algorithm would for example be to create a fully connected neural network \cite{gent1992special}. By doing this both RNN's would be connected and would train as a unit. This would have the positive effect of not having to retrain the classifying RNN as well whenever the first on gets changed. \\
+Another goal could be to make this type of RNN appliable to more types of problems. So for example instead of being restricted to tracks of a specific length (here eight hits) one could make it more general to be able to deal with an arbitrary length of the track. This would be especially useful for this experiment, as a lot of particles don't just recurl once but many times over (in the central station). Hereby the are creating a lot of background, which minimalizing is crucial to reach our desired sensitivity of $10^{-16}$.\\
+The ultimate goal however would be to replace the current track reconstruction algorithm altogether and put a RNN in its place. This could for example be done by an RNN performing beam search\footnote{Both inside out and outside in} \cite{graves2013speech} to find the true track of a particle. In other areas, beam search has proven to be a powerful tool and there is a lot of potential for this sort of algorithm in physics as well, especially in track reconstruction
\ No newline at end of file
diff --git a/Report/07_Anlysis.aux b/Report/07_Anlysis.aux
new file mode 100644
index 0000000..f424641
--- /dev/null
+++ b/Report/07_Anlysis.aux
@@ -0,0 +1,36 @@
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\@setckpt{07_Anlysis}{
+\setcounter{page}{34}
+\setcounter{equation}{11}
+\setcounter{enumi}{0}
+\setcounter{enumii}{0}
+\setcounter{enumiii}{0}
+\setcounter{enumiv}{0}
+\setcounter{footnote}{22}
+\setcounter{mpfootnote}{0}
+\setcounter{part}{0}
+\setcounter{section}{7}
+\setcounter{subsection}{2}
+\setcounter{subsubsection}{0}
+\setcounter{paragraph}{0}
+\setcounter{subparagraph}{0}
+\setcounter{figure}{11}
+\setcounter{table}{3}
+\setcounter{parentequation}{0}
+\setcounter{AM@survey}{0}
+\setcounter{ContinuedFloat}{0}
+\setcounter{subfigure}{0}
+\setcounter{subtable}{0}
+\setcounter{float@type}{4}
+\setcounter{Item}{0}
+\setcounter{Hfootnote}{22}
+\setcounter{bookmark@seq@number}{47}
+\setcounter{@stackindex}{1}
+\setcounter{ROWcellindex@}{0}
+\setcounter{TABrowindex@}{2}
+\setcounter{TABcolindex@}{1}
+\setcounter{TABalignmentindex@}{0}
+\setcounter{pp@next@reset}{0}
+\setcounter{section@level}{2}
+}
diff --git a/Report/08_Appendix.aux b/Report/08_Appendix.aux
new file mode 100644
index 0000000..dd7b30c
--- /dev/null
+++ b/Report/08_Appendix.aux
@@ -0,0 +1,37 @@
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\@writefile{toc}{\contentsline {section}{\numberline {9}Acknowledgements}{40}{section.9}}
+\@setckpt{08_Appendix}{
+\setcounter{page}{41}
+\setcounter{equation}{11}
+\setcounter{enumi}{0}
+\setcounter{enumii}{0}
+\setcounter{enumiii}{0}
+\setcounter{enumiv}{0}
+\setcounter{footnote}{28}
+\setcounter{mpfootnote}{0}
+\setcounter{part}{0}
+\setcounter{section}{9}
+\setcounter{subsection}{0}
+\setcounter{subsubsection}{0}
+\setcounter{paragraph}{0}
+\setcounter{subparagraph}{0}
+\setcounter{figure}{14}
+\setcounter{table}{3}
+\setcounter{parentequation}{0}
+\setcounter{AM@survey}{0}
+\setcounter{ContinuedFloat}{0}
+\setcounter{subfigure}{2}
+\setcounter{subtable}{0}
+\setcounter{float@type}{4}
+\setcounter{Item}{0}
+\setcounter{Hfootnote}{28}
+\setcounter{bookmark@seq@number}{54}
+\setcounter{@stackindex}{1}
+\setcounter{ROWcellindex@}{0}
+\setcounter{TABrowindex@}{2}
+\setcounter{TABcolindex@}{1}
+\setcounter{TABalignmentindex@}{0}
+\setcounter{pp@next@reset}{0}
+\setcounter{section@level}{1}
+}
diff --git a/Report/08_Appendix.tex b/Report/08_Appendix.tex
new file mode 100644
index 0000000..1d48a90
--- /dev/null
+++ b/Report/08_Appendix.tex
@@ -0,0 +1,6 @@
+\section{Acknowledgements}
+
+I would like to to thank the Physics Department of the University of Zurich.
+Special thanks goes to Prof. Nicola Serra of the University of Zurich who let me do this thesis in his research and introduced me into the world of neural networks.\\
+I would also like to express my gratitude towards Dr. Patrick Owen for providing me with data and always being here to help.\\
+Also special thanks goes to Jonas Eschle, who was always there to help me with programming the RNN's and discuss techniques and tricks.
\ No newline at end of file
diff --git a/Report/bib/General.bib b/Report/bib/General.bib
index 29c847b..5b35db6 100644
--- a/Report/bib/General.bib
+++ b/Report/bib/General.bib
@@ -1,5 +1,48 @@
 %%% General Books and Citations
 
+%beam search
+@inproceedings{graves2013speech,
+  title={Speech recognition with deep recurrent neural networks},
+  author={Graves, Alex and Mohamed, Abdel-rahman and Hinton, Geoffrey},
+  booktitle={Acoustics, speech and signal processing (icassp), 2013 ieee international conference on},
+  pages={6645--6649},
+  year={2013},
+  organization={IEEE}
+}
+
+%Fully connected NN
+@article{gent1992special,
+  title={Special Feature. Predicting time series by a fully connected neural network trained by back propagation},
+  author={Gent, CR and Sheppard, CP},
+  journal={Computing \& Control Engineering Journal},
+  volume={3},
+  number={3},
+  pages={109--112},
+  year={1992},
+  publisher={IET}
+}
+
+%Geant4
+@article{agostinelli2003s,
+  title={S. Agostinelli et al.(GEANT4 Collaboration), Nucl. Instrum. Methods Phys. Res., Sect. A 506, 250 (2003).},
+  author={Agostinelli, S},
+  journal={Nucl. Instrum. Methods Phys. Res., Sect. A},
+  volume={506},
+  pages={250},
+  year={2003}
+}
+
+%sklearn
+@article{pedregosa2011scikit,
+  title={Scikit-learn: Machine learning in Python},
+  author={Pedregosa, Fabian and Varoquaux, Ga{\"e}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and others},
+  journal={Journal of machine learning research},
+  volume={12},
+  number={Oct},
+  pages={2825--2830},
+  year={2011}
+}
+
 %Thomson book
 @book{thomson2013modern,
   title={Modern particle physics},
@@ -225,4 +268,16 @@
   volume={14},
   pages={571--582},
   year={2014}
+}
+
+%Bidirectional RNN
+@article{schuster1997bidirectional,
+  title={Bidirectional recurrent neural networks},
+  author={Schuster, Mike and Paliwal, Kuldip K},
+  journal={IEEE Transactions on Signal Processing},
+  volume={45},
+  number={11},
+  pages={2673--2681},
+  year={1997},
+  publisher={IEEE}
 }
\ No newline at end of file
diff --git a/Report/img/LSTM_cell.png b/Report/img/LSTM_cell.png
new file mode 100644
index 0000000..1dcb93d
--- /dev/null
+++ b/Report/img/LSTM_cell.png
Binary files differ
diff --git a/Report/img/RNN-Classifier-Arch.png b/Report/img/RNN-Classifier-Arch.png
new file mode 100644
index 0000000..8aa1bd6
--- /dev/null
+++ b/Report/img/RNN-Classifier-Arch.png
Binary files differ
diff --git a/Report/img/RNN-Pred-Arch.png b/Report/img/RNN-Pred-Arch.png
new file mode 100644
index 0000000..5c0864c
--- /dev/null
+++ b/Report/img/RNN-Pred-Arch.png
Binary files differ
diff --git a/Report/img/RNN_general_architecture.png b/Report/img/RNN_general_architecture.png
new file mode 100644
index 0000000..b00933c
--- /dev/null
+++ b/Report/img/RNN_general_architecture.png
Binary files differ
diff --git a/Report/img/XGB_ROC-curve.png b/Report/img/XGB_ROC-curve.png
new file mode 100644
index 0000000..b9f70bd
--- /dev/null
+++ b/Report/img/XGB_ROC-curve.png
Binary files differ
diff --git a/Report/img/XGB_tf-ft_hist.png b/Report/img/XGB_tf-ft_hist.png
new file mode 100644
index 0000000..6e79704
--- /dev/null
+++ b/Report/img/XGB_tf-ft_hist.png
Binary files differ
diff --git a/Report/img/batch_norm.jpeg b/Report/img/batch_norm.jpeg
new file mode 100644
index 0000000..8d18629
--- /dev/null
+++ b/Report/img/batch_norm.jpeg
Binary files differ
diff --git a/Report/img/dataset_2-true_false_distr.png b/Report/img/dataset_2-true_false_distr.png
new file mode 100644
index 0000000..58f8aac
--- /dev/null
+++ b/Report/img/dataset_2-true_false_distr.png
Binary files differ
diff --git "a/Report/img/misreco\1332202\135.png" "b/Report/img/misreco\1332202\135.png"
index 52b74ee..2d3f303 100644
--- "a/Report/img/misreco\1332202\135.png"
+++ "b/Report/img/misreco\1332202\135.png"
Binary files differ
diff --git a/Report/img/tracks_in_det_xy.png b/Report/img/tracks_in_det_xy.png
index 1542a30..9baf5fe 100644
--- a/Report/img/tracks_in_det_xy.png
+++ b/Report/img/tracks_in_det_xy.png
Binary files differ
diff --git a/Report/img/tracks_in_det_z.png b/Report/img/tracks_in_det_z.png
new file mode 100644
index 0000000..46916ba
--- /dev/null
+++ b/Report/img/tracks_in_det_z.png
Binary files differ