diff --git a/Report/00_main.aux b/Report/00_main.aux index 1b41696..94a8c7a 100644 --- a/Report/00_main.aux +++ b/Report/00_main.aux @@ -27,27 +27,28 @@ \@input{08_Appendix.aux} \bibstyle{unsrt} \bibdata{bib/General} -\bibcite{thomson2013modern}{1} -\bibcite{abe2008precision}{2} -\bibcite{adamson2011measurement}{3} -\bibcite{blondel2013research}{4} -\bibcite{augustin2017mupix}{5} -\bibcite{philipp2015hv}{6} -\bibcite{augustin2015mupix}{7} -\bibcite{connor1994recurrent}{8} -\bibcite{grossberg2013recurrent}{9} -\bibcite{ML:XGBoost}{10} -\bibcite{chollet2015keras}{11} -\bibcite{abadi2016tensorflow}{12} -\bibcite{klambauer2017self}{13} -\bibcite{chilimbi2014project}{14} -\bibcite{ioffe2015batch}{15} -\bibcite{cooijmans2016recurrent}{16} -\bibcite{schuster1997bidirectional}{17} -\bibcite{gers1999learning}{18} -\bibcite{chung2014empirical}{19} -\bibcite{agostinelli2003s}{20} -\bibcite{pedregosa2011scikit}{21} -\bibcite{ML:ROC_AUC:Bradley:1997:UAU:1746432.1746434}{22} -\bibcite{gent1992special}{23} -\bibcite{graves2013speech}{24} +\bibcite{akrawy1989measurement}{1} +\bibcite{thomson2013modern}{2} +\bibcite{abe2008precision}{3} +\bibcite{adamson2011measurement}{4} +\bibcite{blondel2013research}{5} +\bibcite{augustin2017mupix}{6} +\bibcite{philipp2015hv}{7} +\bibcite{augustin2015mupix}{8} +\bibcite{connor1994recurrent}{9} +\bibcite{grossberg2013recurrent}{10} +\bibcite{ML:XGBoost}{11} +\bibcite{chollet2015keras}{12} +\bibcite{abadi2016tensorflow}{13} +\bibcite{klambauer2017self}{14} +\bibcite{chilimbi2014project}{15} +\bibcite{ioffe2015batch}{16} +\bibcite{cooijmans2016recurrent}{17} +\bibcite{schuster1997bidirectional}{18} +\bibcite{gers1999learning}{19} +\bibcite{chung2014empirical}{20} +\bibcite{agostinelli2003s}{21} +\bibcite{pedregosa2011scikit}{22} +\bibcite{ML:ROC_AUC:Bradley:1997:UAU:1746432.1746434}{23} +\bibcite{gent1992special}{24} +\bibcite{graves2013speech}{25} diff --git a/Report/00_main.bbl b/Report/00_main.bbl index aefb3af..56b45f4 100644 --- a/Report/00_main.bbl +++ b/Report/00_main.bbl @@ -1,5 +1,11 @@ \begin{thebibliography}{10} +\bibitem{akrawy1989measurement} +MZ~Akrawy, G~Alexander, J~Allison, PP~Allport, KJ~Anderson, JC~Armitage, GTJ + Arnison, P~Ashton, G~Azuelos, JTM Baines, et~al. +\newblock Measurement of the z0 mass and width with the opal detector at lep. +\newblock {\em Physics Letters B}, 231(4):530--538, 1989. + \bibitem{thomson2013modern} Mark Thomson. \newblock {\em Modern particle physics}. diff --git a/Report/00_main.blg b/Report/00_main.blg index 986f252..d879324 100644 --- a/Report/00_main.blg +++ b/Report/00_main.blg @@ -9,7 +9,7 @@ A level-1 auxiliary file: 08_Appendix.aux The style file: unsrt.bst Database file #1: bib/General.bib -Repeated entry---line 221 of file bib/General.bib +Repeated entry---line 233 of file bib/General.bib : @article{ML:ROC_AUC:Bradley:1997:UAU:1746432.1746434 : , I'm skipping whatever remains of this entry diff --git a/Report/00_main.log b/Report/00_main.log index 7d5e1c1..360ec50 100644 --- a/Report/00_main.log +++ b/Report/00_main.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.14159265-2.6-1.40.19 (MiKTeX 2.9.6730 64-bit) (preloaded format=pdflatex 2018.7.26) 1 AUG 2018 20:31 +This is pdfTeX, Version 3.14159265-2.6-1.40.19 (MiKTeX 2.9.6730 64-bit) (preloaded format=pdflatex 2018.7.26) 4 AUG 2018 14:03 entering extended mode **./00_main.tex (00_main.tex @@ -1656,104 +1656,93 @@ [] -Underfull \hbox (badness 10000) in paragraph at lines 64--74 +Underfull \hbox (badness 10000) in paragraph at lines 64--72 + + [] + +[5] +Underfull \hbox (badness 10000) in paragraph at lines 73--74 + + [] + +[6] +Underfull \hbox (badness 10000) in paragraph at lines 80--81 [] -Underfull \hbox (badness 10000) in paragraph at lines 64--74 - - [] - -[5] [6] -Underfull \hbox (badness 10000) in paragraph at lines 78--81 +Underfull \hbox (badness 10000) in paragraph at lines 82--84 [] -Underfull \hbox (badness 10000) in paragraph at lines 78--81 +Underfull \hbox (badness 10000) in paragraph at lines 82--84 [] - -Underfull \hbox (badness 10000) in paragraph at lines 78--81 - - [] - - + File: img/beta_decay_feynman.png Graphic file (type png) -Package pdftex.def Info: img/beta_decay_feynman.png used on input line 85. -(pdftex.def) Requested size: 140.39958pt x 140.40762pt. - +Package pdftex.def Info: img/beta_decay_feynman.png used on input line 88. +(pdftex.def) Requested size: 140.39958pt x 158.65813pt. + File: img/muon-decay-feynman.png Graphic file (type png) -Package pdftex.def Info: img/muon-decay-feynman.png used on input line 90. -(pdftex.def) Requested size: 140.39958pt x 119.05476pt. - -Underfull \hbox (badness 10000) in paragraph at lines 98--101 +Package pdftex.def Info: img/muon-decay-feynman.png used on input line 93. +(pdftex.def) Requested size: 140.39958pt x 135.83727pt. +[7 <./img/beta_decay_feynman.png> <./img/muon-decay-feynman.png>] [8] +Underfull \hbox (badness 10000) in paragraph at lines 124--125 [] -Underfull \hbox (badness 10000) in paragraph at lines 98--101 - - [] - -[7 <./img/beta_decay_feynman.png> <./img/muon-decay-feynman.png>] -Underfull \hbox (badness 10000) in paragraph at lines 121--122 +Underfull \hbox (badness 10000) in paragraph at lines 124--125 [] -Underfull \hbox (badness 10000) in paragraph at lines 121--122 +Underfull \hbox (badness 10000) in paragraph at lines 126--127 [] -Underfull \hbox (badness 10000) in paragraph at lines 123--124 +Underfull \hbox (badness 10000) in paragraph at lines 126--127 [] - -Underfull \hbox (badness 10000) in paragraph at lines 123--124 - - [] - -[8] - + File: img/neutrino_oscillation.png Graphic file (type png) -Package pdftex.def Info: img/neutrino_oscillation.png used on input line 144. +Package pdftex.def Info: img/neutrino_oscillation.png used on input line 147. (pdftex.def) Requested size: 312.00119pt x 131.50337pt. - -Underfull \hbox (badness 10000) in paragraph at lines 156--158 +[9] +Underfull \hbox (badness 10000) in paragraph at lines 159--161 [] -Underfull \hbox (badness 10000) in paragraph at lines 159--162 +Underfull \hbox (badness 10000) in paragraph at lines 162--165 [] -[9 <./img/neutrino_oscillation.png>] - +[10 <./img/neutrino_oscillation.png>] + File: img/LFV-neutrino_osc.png Graphic file (type png) -Package pdftex.def Info: img/LFV-neutrino_osc.png used on input line 168. +Package pdftex.def Info: img/LFV-neutrino_osc.png used on input line 171. (pdftex.def) Requested size: 140.39958pt x 70.74054pt. - + File: img/LFV-SUSY.png Graphic file (type png) -Package pdftex.def Info: img/LFV-SUSY.png used on input line 173. +Package pdftex.def Info: img/LFV-SUSY.png used on input line 176. (pdftex.def) Requested size: 140.39958pt x 71.59482pt. - + File: img/LFV-tree_lvl.png Graphic file (type png) -Package pdftex.def Info: img/LFV-tree_lvl.png used on input line 178. +Package pdftex.def Info: img/LFV-tree_lvl.png used on input line 181. (pdftex.def) Requested size: 140.39958pt x 94.67162pt. -) [10 <./img/LFV-neutrino_osc.png> <./img/LFV-SUSY.png> <./img/LFV-tree_lvl.png ->] [11] +) [11 <./img/LFV-neutrino_osc.png> <./img/LFV-SUSY.png> <./img/LFV-tree_lvl.png +>] \openout2 = `02_mu_to_3e_decay.aux'. (02_mu_to_3e_decay.tex @@ -1801,27 +1790,27 @@ \openout2 = `03_experimental_setup.aux'. (03_experimental_setup.tex - + File: img/setup-Ia.png Graphic file (type png) Package pdftex.def Info: img/setup-Ia.png used on input line 23. (pdftex.def) Requested size: 312.00119pt x 145.84636pt. - + File: img/tracks-phase_I.png Graphic file (type png) Package pdftex.def Info: img/tracks-phase_I.png used on input line 28. (pdftex.def) Requested size: 140.39958pt x 143.79482pt. - + File: img/tracks-phase_II.png Graphic file (type png) Package pdftex.def Info: img/tracks-phase_II.png used on input line 33. (pdftex.def) Requested size: 140.39958pt x 145.23878pt. - + File: img/setup-Ib.png Graphic file (type png) Package pdftex.def Info: img/setup-Ib.png used on input line 38. (pdftex.def) Requested size: 390.0pt x 123.95313pt. - + File: img/setup-II.png Graphic file (type png) Package pdftex.def Info: img/setup-II.png used on input line 43. @@ -1845,13 +1834,13 @@ [] [17] - + File: img/tracks_in_det_xy.png Graphic file (type png) Package pdftex.def Info: img/tracks_in_det_xy.png used on input line 70. (pdftex.def) Requested size: 312.00119pt x 361.16603pt. [18] - + File: img/tracks_in_det_z.png Graphic file (type png) Package pdftex.def Info: img/tracks_in_det_z.png used on input line 78. @@ -1874,12 +1863,12 @@ ] - + File: img/neural_network.png Graphic file (type png) Package pdftex.def Info: img/neural_network.png used on input line 29. (pdftex.def) Requested size: 312.00119pt x 169.33112pt. - + File: img/neuron.png Graphic file (type png) Package pdftex.def Info: img/neuron.png used on input line 34. @@ -1890,17 +1879,17 @@ [] [22 <./img/neural_network.png> <./img/neuron.png>] - + File: img/selu.png Graphic file (type png) Package pdftex.def Info: img/selu.png used on input line 59. (pdftex.def) Requested size: 175.49881pt x 120.31479pt. - + File: img/relu.png Graphic file (type png) Package pdftex.def Info: img/relu.png used on input line 64. (pdftex.def) Requested size: 175.49881pt x 120.00516pt. - + File: img/tanh.png Graphic file (type png) Package pdftex.def Info: img/tanh.png used on input line 69. @@ -1914,13 +1903,13 @@ LaTeX Font Info: Font shape `OMS/cmr/m/n' in size <12> not available (Font) Font shape `OMS/cmsy/m/n' tried instead on input line 99. [25] [26] - + File: img/batch_norm.jpeg Graphic file (type jpg) Package pdftex.def Info: img/batch_norm.jpeg used on input line 164. (pdftex.def) Requested size: 390.0pt x 134.28722pt. [27 <./img/batch_norm.jpeg>] - + File: img/RNN_general_architecture.png Graphic file (type png) Package pdftex.def Info: img/RNN_general_architecture.png used on input line 1 @@ -1931,12 +1920,12 @@ [] - +[28 <./img/RNN_general_architecture.png>] Underfull \hbox (badness 10000) in paragraph at lines 201--204 [] -[28 <./img/RNN_general_architecture.png>] + Underfull \hbox (badness 10000) in paragraph at lines 209--210 [] @@ -1946,17 +1935,18 @@ [] - +[29] + File: img/LSTM_cell.png Graphic file (type png) Package pdftex.def Info: img/LSTM_cell.png used on input line 217. (pdftex.def) Requested size: 312.00119pt x 186.04034pt. -[29] + Underfull \hbox (badness 10000) in paragraph at lines 237--238 [] -) [30 <./img/LSTM_cell.png>] +) [30 <./img/LSTM_cell.png>] [31] \openout2 = `05_Data.aux'. (05_Data.tex @@ -1969,19 +1959,19 @@ [] -[31 +[32 -]) [32] +]) [33] \openout2 = `06_RNN_used.aux'. (06_RNN_used.tex - + File: img/RNN-Pred-Arch.png Graphic file (type png) Package pdftex.def Info: img/RNN-Pred-Arch.png used on input line 9. (pdftex.def) Requested size: 390.0pt x 314.7748pt. - [33 + [34 <./img/RNN-Pred-Arch.png>] @@ -1995,12 +1985,12 @@ [] - + File: img/RNN-Classifier-Arch.png Graphic file (type png) Package pdftex.def Info: img/RNN-Classifier-Arch.png used on input line 45. (pdftex.def) Requested size: 292.5pt x 543.60568pt. -[34] [35 <./img/RNN-Classifier-Arch.png>]) [36] +[35] [36 <./img/RNN-Classifier-Arch.png>]) [37] \openout2 = `07_Analysis.aux'. (07_Analysis.tex @@ -2030,17 +2020,17 @@ [] - + File: img/RNN_tf-ft_hist.png Graphic file (type png) Package pdftex.def Info: img/RNN_tf-ft_hist.png used on input line 16. (pdftex.def) Requested size: 312.00119pt x 223.1071pt. - + File: img/RNN_ROC-curve.png Graphic file (type png) Package pdftex.def Info: img/RNN_ROC-curve.png used on input line 21. (pdftex.def) Requested size: 312.00119pt x 213.3579pt. -[37 +[38 ] @@ -2048,41 +2038,34 @@ [] -[38 <./img/RNN_tf-ft_hist.png> <./img/RNN_ROC-curve.png>] - +[39 <./img/RNN_tf-ft_hist.png> <./img/RNN_ROC-curve.png>] + File: img/XGB_tf-ft_hist.png Graphic file (type png) Package pdftex.def Info: img/XGB_tf-ft_hist.png used on input line 40. (pdftex.def) Requested size: 312.00119pt x 226.13411pt. - + File: img/XGB_ROC-curve.png Graphic file (type png) Package pdftex.def Info: img/XGB_ROC-curve.png used on input line 45. (pdftex.def) Requested size: 312.00119pt x 213.3579pt. - [39] + [40] Underfull \hbox (badness 10000) in paragraph at lines 53--54 [] -[40 <./img/XGB_tf-ft_hist.png> <./img/XGB_ROC-curve.png>] +[41 <./img/XGB_tf-ft_hist.png> <./img/XGB_ROC-curve.png>] Underfull \hbox (badness 10000) in paragraph at lines 55--56 [] - -Package caption Warning: \label without proper reference on input line 64. -See the caption package documentation for explanation. - - -LaTeX Warning: Reference `RNN-XGB_ROC' on page 41 undefined on input line 59. - - + File: img/RNN-XGB_ROC-curve_comparison.png Graphic file (type png) Package pdftex.def Info: img/RNN-XGB_ROC-curve_comparison.png used on input li ne 63. (pdftex.def) Requested size: 312.00119pt x 213.3579pt. -[41 <./img/RNN-XGB_ROC-curve_comparison.png>] +[42 <./img/RNN-XGB_ROC-curve_comparison.png>] Underfull \hbox (badness 10000) in paragraph at lines 74--75 [] @@ -2092,17 +2075,17 @@ [] -) [42] +) [43] \openout2 = `08_Appendix.aux'. - (08_Appendix.tex) [43 + (08_Appendix.tex) [44 -] (00_main.bbl [44 +] (00_main.bbl [45 -] [45]) +] [46]) Package atveryend Info: Empty hook `BeforeClearDocument' on input line 72. - [46] + [47] Package atveryend Info: Empty hook `AfterLastShipout' on input line 72. (00_main.aux (01_Standard_Model.aux) (02_mu_to_3e_decay.aux) (03_experimental_setup.aux) @@ -2114,21 +2097,18 @@ (rerunfilecheck) Checksum: F467950E883FD22A1766479B9392E7BD;3897. -LaTeX Warning: There were undefined references. - - LaTeX Warning: There were multiply-defined labels. Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 72. ) Here is how much of TeX's memory you used: - 20043 strings out of 492973 - 338828 string characters out of 3135932 - 434140 words of memory out of 3000000 - 23448 multiletter control sequences out of 15000+200000 + 20046 strings out of 492973 + 338884 string characters out of 3135932 + 433167 words of memory out of 3000000 + 23449 multiletter control sequences out of 15000+200000 548944 words of font info for 87 fonts, out of 3000000 for 9000 1141 hyphenation exceptions out of 8191 - 47i,19n,65p,1101b,571s stack positions out of 5000i,500n,10000p,200000b,50000s + 47i,19n,65p,1103b,569s stack positions out of 5000i,500n,10000p,200000b,50000s pdfTeX warning (dest): name{Hfootnote.29} has been referenced but does not ex ist, replaced by a fixed one @@ -2233,9 +2213,9 @@ onts/cm/cmsy6.pfb> -Output written on 00_main.pdf (46 pages, 1712305 bytes). +Output written on 00_main.pdf (47 pages, 1706673 bytes). PDF statistics: - 814 PDF objects out of 1000 (max. 8388607) - 188 named destinations out of 1000 (max. 500000) + 820 PDF objects out of 1000 (max. 8388607) + 190 named destinations out of 1000 (max. 500000) 589 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/Report/00_main.pdf b/Report/00_main.pdf index 8b3de49..18299df 100644 --- a/Report/00_main.pdf +++ b/Report/00_main.pdf Binary files differ diff --git a/Report/00_main.synctex.gz b/Report/00_main.synctex.gz index 55d4672..bda1fa7 100644 --- a/Report/00_main.synctex.gz +++ b/Report/00_main.synctex.gz Binary files differ diff --git a/Report/00_main.toc b/Report/00_main.toc index 1cf051c..f4d58e5 100644 --- a/Report/00_main.toc +++ b/Report/00_main.toc @@ -2,9 +2,9 @@ \contentsline {section}{\numberline {1}Standard Model}{4}{section.1} \contentsline {subsection}{\numberline {1.1}Elementary particles and forces}{4}{subsection.1.1} \contentsline {subsection}{\numberline {1.2}Interaction rules}{7}{subsection.1.2} -\contentsline {section}{\numberline {2}Physics beyond the SM}{8}{section.2} -\contentsline {subsection}{\numberline {2.1}Neutrino Oscillation}{8}{subsection.2.1} -\contentsline {subsection}{\numberline {2.2}New physics}{9}{subsection.2.2} +\contentsline {section}{\numberline {2}Physics beyond the SM}{9}{section.2} +\contentsline {subsection}{\numberline {2.1}Neutrino Oscillation}{9}{subsection.2.1} +\contentsline {subsection}{\numberline {2.2}New physics}{10}{subsection.2.2} \contentsline {section}{\numberline {3}$\mu \rightarrow eee$ decay}{12}{section.3} \contentsline {subsection}{\numberline {3.1}Kinematics}{12}{subsection.3.1} \contentsline {subsection}{\numberline {3.2}Background events}{12}{subsection.3.2} @@ -12,7 +12,7 @@ \contentsline {subsubsection}{\numberline {3.2.2}Michel decay}{13}{subsubsection.3.2.2} \contentsline {subsubsection}{\numberline {3.2.3}Radiative muon decay}{13}{subsubsection.3.2.3} \contentsline {subsubsection}{\numberline {3.2.4}BhaBha scattering}{13}{subsubsection.3.2.4} -\contentsline {subsubsection}{\numberline {3.2.5}Pion decays}{13}{subsubsection.3.2.5} +\contentsline {subsubsection}{\numberline {3.2.5}Pion decays}{14}{subsubsection.3.2.5} \contentsline {subsubsection}{\numberline {3.2.6}Analysis of the background}{14}{subsubsection.3.2.6} \contentsline {section}{\numberline {4}Mu3e experiment}{15}{section.4} \contentsline {subsection}{\numberline {4.1}Requirements}{15}{subsection.4.1} @@ -35,23 +35,23 @@ \contentsline {subsubsection}{\numberline {5.2.10}Batch normalisation}{27}{subsubsection.5.2.10} \contentsline {subsection}{\numberline {5.3}Recurrent Neural Networks}{27}{subsection.5.3} \contentsline {subsubsection}{\numberline {5.3.1}General concepts}{27}{subsubsection.5.3.1} -\contentsline {subsubsection}{\numberline {5.3.2}Most common architectures}{28}{subsubsection.5.3.2} +\contentsline {subsubsection}{\numberline {5.3.2}Most common architectures}{29}{subsubsection.5.3.2} \contentsline {subsubsection}{\numberline {5.3.3}Cell types}{29}{subsubsection.5.3.3} \contentsline {subsection}{\numberline {5.4}XGBoost}{30}{subsection.5.4} -\contentsline {section}{\numberline {6}Data}{31}{section.6} -\contentsline {subsection}{\numberline {6.1}General information}{31}{subsection.6.1} -\contentsline {subsection}{\numberline {6.2}Preprocessing}{31}{subsection.6.2} -\contentsline {subsubsection}{\numberline {6.2.1}Dataset 1}{31}{subsubsection.6.2.1} -\contentsline {subsubsection}{\numberline {6.2.2}Dataset 2}{32}{subsubsection.6.2.2} -\contentsline {section}{\numberline {7}RNN's used}{33}{section.7} -\contentsline {subsection}{\numberline {7.1}RNN for track prediction}{33}{subsection.7.1} -\contentsline {subsection}{\numberline {7.2}RNN for classification of tracks}{34}{subsection.7.2} -\contentsline {section}{\numberline {8}Results}{37}{section.8} -\contentsline {subsection}{\numberline {8.1}Best $\chi ^2$}{37}{subsection.8.1} -\contentsline {subsection}{\numberline {8.2}RNN classifier with RNN track prediction input}{37}{subsection.8.2} -\contentsline {subsection}{\numberline {8.3}XGBoost}{39}{subsection.8.3} -\contentsline {subsection}{\numberline {8.4}Comparison in performance of the RNN and XGBoost}{41}{subsection.8.4} -\contentsline {section}{\numberline {9}Results}{42}{section.9} -\contentsline {subsection}{\numberline {9.1}Results}{42}{subsection.9.1} -\contentsline {subsection}{\numberline {9.2}Outlook and potential}{42}{subsection.9.2} -\contentsline {section}{\numberline {10}Acknowledgements}{43}{section.10} +\contentsline {section}{\numberline {6}Data}{32}{section.6} +\contentsline {subsection}{\numberline {6.1}General information}{32}{subsection.6.1} +\contentsline {subsection}{\numberline {6.2}Preprocessing}{32}{subsection.6.2} +\contentsline {subsubsection}{\numberline {6.2.1}Dataset 1}{32}{subsubsection.6.2.1} +\contentsline {subsubsection}{\numberline {6.2.2}Dataset 2}{33}{subsubsection.6.2.2} +\contentsline {section}{\numberline {7}RNN's used}{34}{section.7} +\contentsline {subsection}{\numberline {7.1}RNN for track prediction}{34}{subsection.7.1} +\contentsline {subsection}{\numberline {7.2}RNN for classification of tracks}{35}{subsection.7.2} +\contentsline {section}{\numberline {8}Results}{38}{section.8} +\contentsline {subsection}{\numberline {8.1}Best $\chi ^2$}{38}{subsection.8.1} +\contentsline {subsection}{\numberline {8.2}RNN classifier with RNN track prediction input}{38}{subsection.8.2} +\contentsline {subsection}{\numberline {8.3}XGBoost}{40}{subsection.8.3} +\contentsline {subsection}{\numberline {8.4}Comparison in performance of the RNN and XGBoost}{42}{subsection.8.4} +\contentsline {section}{\numberline {9}Results}{43}{section.9} +\contentsline {subsection}{\numberline {9.1}Results}{43}{subsection.9.1} +\contentsline {subsection}{\numberline {9.2}Outlook and potential}{43}{subsection.9.2} +\contentsline {section}{\numberline {10}Acknowledgements}{44}{section.10} diff --git a/Report/01_Standard_Model.aux b/Report/01_Standard_Model.aux index edfff30..1ca0a73 100644 --- a/Report/01_Standard_Model.aux +++ b/Report/01_Standard_Model.aux @@ -1,5 +1,6 @@ \relax \providecommand\hyper@newdestlabel[2]{} +\citation{akrawy1989measurement} \@writefile{toc}{\contentsline {section}{\numberline {1}Standard Model}{4}{section.1}} \@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Elementary particles and forces}{4}{subsection.1.1}} \newlabel{intro_elem_part}{{1.1}{4}{Elementary particles and forces}{subsection.1.1}{}} @@ -22,20 +23,20 @@ \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Certain diagrams of decays\relax }}{7}{figure.caption.5}} \citation{abe2008precision} \citation{adamson2011measurement} -\@writefile{toc}{\contentsline {section}{\numberline {2}Physics beyond the SM}{8}{section.2}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Neutrino Oscillation}{8}{subsection.2.1}} -\newlabel{PMNS_neutrino}{{1}{8}{Neutrino Oscillation}{equation.2.1}{}} +\@writefile{toc}{\contentsline {section}{\numberline {2}Physics beyond the SM}{9}{section.2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Neutrino Oscillation}{9}{subsection.2.1}} +\newlabel{PMNS_neutrino}{{1}{9}{Neutrino Oscillation}{equation.2.1}{}} \newlabel{neutrino_flavour_change_prob}{{2}{9}{Neutrino Oscillation}{equation.2.2}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Process that violates lepton family number conservation through neutrino oscillation\relax }}{9}{figure.caption.6}} -\newlabel{neutrino_osc_feyn}{{2}{9}{Process that violates lepton family number conservation through neutrino oscillation\relax }{figure.caption.6}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}New physics}{9}{subsection.2.2}} -\newlabel{LFV-neutrino_osc}{{3a}{10}{LFV through neutrino oscillation\relax }{figure.caption.7}{}} -\newlabel{sub@LFV-neutrino_osc}{{a}{10}{LFV through neutrino oscillation\relax }{figure.caption.7}{}} -\newlabel{LFV-SUSY}{{3b}{10}{LFV by using supersymmetric particles\relax }{figure.caption.7}{}} -\newlabel{sub@LFV-SUSY}{{b}{10}{LFV by using supersymmetric particles\relax }{figure.caption.7}{}} -\newlabel{LFV-tree_lvl}{{3c}{10}{LFV at tree level\relax }{figure.caption.7}{}} -\newlabel{sub@LFV-tree_lvl}{{c}{10}{LFV at tree level\relax }{figure.caption.7}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Charged LFV\relax }}{10}{figure.caption.7}} +\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Process that violates lepton family number conservation through neutrino oscillation\relax }}{10}{figure.caption.6}} +\newlabel{neutrino_osc_feyn}{{2}{10}{Process that violates lepton family number conservation through neutrino oscillation\relax }{figure.caption.6}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}New physics}{10}{subsection.2.2}} +\newlabel{LFV-neutrino_osc}{{3a}{11}{LFV through neutrino oscillation\relax }{figure.caption.7}{}} +\newlabel{sub@LFV-neutrino_osc}{{a}{11}{LFV through neutrino oscillation\relax }{figure.caption.7}{}} +\newlabel{LFV-SUSY}{{3b}{11}{LFV by using supersymmetric particles\relax }{figure.caption.7}{}} +\newlabel{sub@LFV-SUSY}{{b}{11}{LFV by using supersymmetric particles\relax }{figure.caption.7}{}} +\newlabel{LFV-tree_lvl}{{3c}{11}{LFV at tree level\relax }{figure.caption.7}{}} +\newlabel{sub@LFV-tree_lvl}{{c}{11}{LFV at tree level\relax }{figure.caption.7}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Charged LFV\relax }}{11}{figure.caption.7}} \@setckpt{01_Standard_Model}{ \setcounter{page}{12} \setcounter{equation}{2} diff --git a/Report/01_Standard_Model.tex b/Report/01_Standard_Model.tex index f764ed1..dd21d04 100644 --- a/Report/01_Standard_Model.tex +++ b/Report/01_Standard_Model.tex @@ -3,8 +3,8 @@ \label{intro_elem_part} The Standard Model(SM) describes all known elementary particles as well as three of the four known forces\footnote{Strong, weak and electromagnetic forces}.\\ -The elementary particles that make up matter can be split into two categories, namely quarks and leptons. There are 6 types of quarks and six types of leptons. The type of a particle is conventionally called flavour. The six quark flavours and the six lepton flavours are separated over 3 generations (each which two quarks and two leptons in it). -Experimental evidence suggests that there exist exactly three generations of particles. Each particle of the first generation has higher energy versions of itself with the similar properties, besides their mass, (e.g. $e^- \rightarrow \mu^- \rightarrow \tau^-$)as in other generations. For each following generation, the particles have a higher mass than the generation before. +The elementary particles that make up matter can be split into two categories, namely quarks and leptons. There are 6 types of quarks and six types of leptons. The type of a particle is conventionally called flavour. The six quark flavours and the three lepton flavours are separated over 3 generations (each which two quarks and two leptons in it). +Experimental evidence suggests that there exist exactly three generations of particles \cite{akrawy1989measurement}. Each particle of the first generation has higher energy versions of itself with the similar properties, besides their mass, (e.g. $e^- \rightarrow \mu^- \rightarrow \tau^-$) as in other generations. For each following generation, the particles have a higher mass than the generation before. \begin{table}[H] \begin{center} @@ -23,8 +23,8 @@ \end{table} One category consists of quarks($q$)(see Table \ref{Quark_SM_table}). In this, we differentiate between up-type quarks, with charge $-\frac{1}{3}e$, and down-type, quarks with charge $\frac{2}{3}e$. Quarks interact with all fundamental forces.\\ -Each quark carries a property called colour-charge. The possible color charges are red(r), green(gr), blue(bl) in which anti-quarks carry anti-colour. Quarks can only carry one colour, whilst every free particle has to be colorless\footnote{Colour confinement}. In conclusion we cannot observe a single quark.\\ -Free particles can achieve being colourless in two ways. Either by having all three colors present in the same amount (one quark of each color), which creates the characteristic group of baryons($qqq$) and anti-baryons($\bar{q}\bar{q}\bar{q}$) or by having a color and its anticolor present, which creates the group of mesons($q\bar{q}$). +Each quark carries a property called colour-charge. The possible colour charges are red(r), green(gr), blue(bl) in which anti-quarks carry anti-colour. Quarks can only carry one colour, whilst every free particle has to be colourless\footnote{Colour confinement}. In conclusion we cannot observe a single quark.\\ +Free particles can achieve being colourless in two ways. Either by having all three colours present in the same amount (one quark of each colour), which creates the characteristic group of baryons($qqq$) and anti-baryons($\bar{q}\bar{q}\bar{q}$) or by having a colour and its anticolour present, which creates the group of mesons($q\bar{q}$). \begin{table}[H] \begin{center} @@ -51,7 +51,7 @@ \begin{center} \caption{Fundamental forces} \label{fund_forces_table} \begin{tabular}{l l l l l l l} -Force & Strengh & Boson & & Spin & Charge & $\frac{mass}{GeV}$ \\\hline +Force & Strength & Boson & & Spin & Charge & $\frac{mass}{GeV}$ \\\hline Strong & 1 & gluon & $g$ & 1& 0& 0 \\ Electromagnetism & $10^{-3}$ & photon & $\gamma$ & 1& 0& 0 \\ Weak & $10^{-8}$ & Z boson & $Z$ & 1& 0& 80.4\\ @@ -62,22 +62,25 @@ The particles of the SM interact through the 3 fundamental forces of the SM. In these interactions, particles called bosons are being exchanged which are the carriers of their respective force (see Table \ref{fund_forces_table}).\\ -As mentioned above, only quarks can interact through the strong force, in which they exchange gluons. Gluons are massless and EM neutrally charged. The strong force has the biggest coupling strengh of 1 (though it decreases with higher energies as a result of gluon-gluon self interaction loops, which interfere negatively in perturbation theory)\cite{thomson2013modern}. A gluon carries colour charge and hence can change the colour of a quark but it conserves its flavour. The strong interaction has an underlying gauge symmetry of SU(3). Therefore, it can be derived that color charge is conserved through the strong interaction\footnote{E.g. through Gell-Mann matrices}.\\ +As mentioned above, only quarks can interact through the strong force, in which they exchange gluons. Gluons are massless and EM neutrally charged. The strong force has the biggest coupling strength of 1 (though it decreases with higher energies as a result of gluon-gluon self interaction loops, which interfere negatively in perturbation theory)\cite{thomson2013modern}. A gluon carries colour charge and hence can change the colour of a quark but it conserves its flavour. The strong interaction has an underlying gauge symmetry of SU(3). Therefore, it can be derived that colour charge is conserved through the strong interaction\footnote{E.g. through Gell-Mann matrices}.\\ The electromagnetic(EM) force is propagated through the photon. It carries zero charge and no invariant mass. Exclusively charged particles can interact through the electromagnetic force. The coupling strength is $\alpha \approx \frac{1}{137}$, contrary to the strong force the coupling constant increases with higher energies\cite{thomson2013modern}. This difference stems from the fact that photon-photon interaction loops are not allowed whereas gluon-gluon interaction loops are. In perturbation theory this results in only positive terms being added to the coupling strength. The underlying gauge symmetry is of SU(1). The electromagnetic force also conserves flavour.\\ -The weak force has two types of bosons. The bosons of the weak force are the only bosons to have an inertial mass.\\ -First we will discuss the EM neutrally charged Z boson. Even though the Z boson belongs to the weak force it, it also has an electromagnetic part additionally to the weak force part\footnote{$Z \rightarrow EM_{part} + W^3$, \cite{thomson2013modern}}. It follows directly, that the Z boson couples weaker to uncharged particles.\\ +The weak force has two types of bosons. The bosons of the weak force are the only fundamental bosons to have an inertial mass.\\ +First we will discuss the EM neutral Z boson. Even though the Z boson belongs to the weak force it, it also has an electromagnetic part additionally to the weak force part\footnote{$Z \rightarrow EM_{part} + W^3$, \cite{thomson2013modern}}. It follows directly, that the Z boson couples weaker to uncharged particles.\\ The other boson of the weak force is the W boson. In the classical SM, the only way particles can change flavour is through the weak force by emitting or absorbing W boson. It is important to notice that, besides of having an invariant mass, the W boson is the only boson with a non zero charge ($Q_{W^\pm} = \pm 1e$). In the gauge symmetry of the weak force the $W^\pm$ are actually the creation and annihilation operators of said symmetry\footnote{$W^\pm = W_1 \pm i W_2$}.\\ An important characteristic of the weak force is that it exclusively couples to lefthanded(LH) particles and righthanded(RH) antiparticles (describing chirality states)\footnote{In the ultrarelativistic limit helicity and chirality eigenstates are the same}.\\ -The chirality operators for left- and righthandedness are: \\\\ -LH: $\frac{1}{2}(1-\gamma^5)$, RH: $\frac{1}{2}(1+\gamma^5)$\\\\ -As a consequence RH particles and LH anti-particles cant couple to the W boson at all. This also results in charged RH particles and LH anti-particles to couple to the Z boson only through the electromagnetic part of the itself, while uncharged RH particles and LH anti particles (e.g. RH $\nu$, LH $\bar{\nu}$) don't couple with the EM force nor the weak force. +The chirality operators for left- and righthandedness are: \\ + +LH: $\frac{1}{2}(1-\gamma^5)$, RH: $\frac{1}{2}(1+\gamma^5)$\\ + +As a consequence RH particles and LH anti-particles can't couple to the W boson at all. This also results in charged RH particles and LH anti-particles to couple to the Z boson only through the electromagnetic part of the itself, while uncharged RH particles and LH anti particles (e.g. RH $\nu$, LH $\bar{\nu}$) don't couple with the EM force nor the weak force. \subsection{Interaction rules} -Now we will establish the general rules for interactions in the SM.\\\\ +Now, we will establish the general rules for interactions in the SM.\\ + \textbf{Baryon number is conserved}\\ -As we already established before, the only interaction that can change flavour is the weak force through the W boson. We directly see that all other interactions baryon number has to be conserved. So any up-type quark can be changed to a down-type quark and backwards by emitting or absorbing a W boson. In the end however, there are still 3 quarks which form a baryon\footnote{Pentaquarks($qqqq\bar{q}$) and other exotic states excluded}, even though it changed its type and charge. A well known example is the beta decay, where a down quark in a neutron decays into a an up quark to form now a proton(e.g. see Figure \ref{beta-decay_feynman}). We easily see that the baryon number is conserved.\\\\ +As we already established before, the only interaction that can change flavour is the weak force through the W boson. We directly see that all other interactions baryon number has to be conserved. So any up-type quark can be changed to a down-type quark and backwards by emitting or absorbing a W boson. In the end however, there are still 3 quarks which form a baryon\footnote{Pentaquarks($qqqq\bar{q}$) and other exotic states excluded}, even though it changed its type and charge. A well known example is the beta decay, where a down quark in a neutron decays into an up quark to form now a proton(e.g. see Figure \ref{beta-decay_feynman}). We easily see that the baryon number is conserved.\\\\ \begin{figure}[H] \begin{center} @@ -97,7 +100,7 @@ \textbf{Lepton family number is conserved}\\ According to the SM lepton family number is conserved. As all interactions beside the W conserve particle flavour, it is easy to see that lepton family number is conserved.\\ -Whenever a lepton interaction with a W boson, it just changes a lepton to its corresponding lepton neutrino and or the other way around (e.g. see Figure \ref{muon-decay_feynman}).\\\\ +Whenever a lepton interacts with a W boson, it just changes a lepton to its corresponding lepton neutrino and or the other way around (e.g. see Figure \ref{muon-decay_feynman}).\newpage \section{Physics beyond the SM} @@ -118,7 +121,7 @@ \label{PMNS_neutrino} \end{equation} -As a result neutrinos propagate as a superposition of all mass eigenstates. Additionally we can describe the PMNS matrix through three mixing angles $\theta_{12}$, $\theta_{13}$ and $\theta_{23}$ and a complex phase $\delta$ \footnote{Measurements: $\theta_{12} \approx 35^\circ$, $\theta_{13} \approx 10^\circ$, $\theta_{23} \approx 45^\circ$ \cite{abe2008precision}, \cite{adamson2011measurement}}. The electron superposition looks then like this:\\\\ +As a result, neutrinos propagate as a superposition of all mass eigenstates. Additionally, we can describe the PMNS matrix through three mixing angles $\theta_{12}$, $\theta_{13}$ and $\theta_{23}$ and a complex phase $\delta$ \footnote{Measurements: $\theta_{12} \approx 35^\circ$, $\theta_{13} \approx 10^\circ$, $\theta_{23} \approx 45^\circ$ \cite{abe2008precision}, \cite{adamson2011measurement}}. The electron superposition looks then like this:\\\\ $\ket{\nu_e} = U_{e_1} \ket{\nu_1} e^{{-i \Phi_1}} + U_{e_2} \ket{\nu_2} e^{{-i \Phi_2}} + U_{e_3} \ket{\nu_3} e^{{-i \Phi_3}}$ with $\Phi_i = E_i \times t$\\\\ @@ -136,7 +139,7 @@ \end{equation} An important thing to note is, that if any elements of the PMNS matrix are complex, this process is not invariant under time reversal ($t \rightarrow -t$)\footnote{\alignLongunderstack{% - \text{The probability does not change if we add a complex phase to the PMNS}\\ \text{ matrix, just if one of the elements has a phase different from the others}}}\\ + \text{The probability does not change, if we add a complex phase to the PMNS}\\ \text{ matrix, just if one of the elements has a phase different from the others}}}\\ $P(\nu_\alpha \rightarrow \nu_\beta) \neq P(\nu_\beta \rightarrow \nu_\alpha)$. \begin{figure}[H] @@ -153,8 +156,8 @@ \subsection{New physics} -As a consequence of neutrino oscillation lepton flavour is a broken symmetry. The SM has to be adapted to include lepton flavour violation (LFV) and massive neutrinos. LFV is also expected for charged neutrinos.\\ -Although it has yet to be determined how LFV violation exactly works to which scale it exists.\\ +As a consequence of neutrino oscillation, lepton flavour is a broken symmetry. The SM has to be adapted to include lepton flavour violation (LFV) and massive neutrinos. LFV is also expected for charged neutrinos.\\ +Although, it has yet to be determined how LFV violation exactly works to which scale it exists.\\ This may raise the question on why charged LFV has never been observed yet. This is especially surprising as the mixing angles of the neutrinos have been measured to be big.\\ There are two reasons why charged LFV is strongly surpressed: @@ -183,7 +186,7 @@ \end{center} \end{figure} -One way charged LFV can occur is through super symmetric particles (see Figure \ref{LFV-SUSY}). By observing charged LFV supersymmetry would gain new importance.\\ +One way charged LFV can occur is through supersymmetric particles (see Figure \ref{LFV-SUSY}). By observing charged LFV supersymmetry would gain new importance.\\ Together with supersymmetric models, other extensions of the SM such as left-right symmetric models, grand unified models, models with an extended Higgs sector and models where electroweak symmetry is broken dynamically are all good candidates to explain charged LFV and most importantly experimentally accessible in a large region of the parameter space. diff --git a/Report/02_mu_to_3e_decay.aux b/Report/02_mu_to_3e_decay.aux index dbbbbb0..a1d753e 100644 --- a/Report/02_mu_to_3e_decay.aux +++ b/Report/02_mu_to_3e_decay.aux @@ -9,8 +9,8 @@ \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.2}Michel decay}{13}{subsubsection.3.2.2}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.3}Radiative muon decay}{13}{subsubsection.3.2.3}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.4}BhaBha scattering}{13}{subsubsection.3.2.4}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.5}Pion decays}{13}{subsubsection.3.2.5}} \citation{blondel2013research} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.5}Pion decays}{14}{subsubsection.3.2.5}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.6}Analysis of the background}{14}{subsubsection.3.2.6}} \@setckpt{02_mu_to_3e_decay}{ \setcounter{page}{15} diff --git a/Report/02_mu_to_3e_decay.tex b/Report/02_mu_to_3e_decay.tex index 88adb9f..6620d10 100644 --- a/Report/02_mu_to_3e_decay.tex +++ b/Report/02_mu_to_3e_decay.tex @@ -25,8 +25,10 @@ \subsection{Background events} +Below is a summary of all the different types of background considered in the experiment. + \subsubsection{Internal conversions} -The event $\mu \rightarrow eee\nu\nu$ results in the same particles seen by the detector as the event we are searching for\footnote{Neutrinos are invisible to our detector}. As a result it proves to be quite challenging to s$\mu \rightarrow eee\nu\nu$eparate the two.\\ +The event $\mu \rightarrow eee\nu\nu$ results in the same particles seen by the detector as the event we are searching for\footnote{Neutrinos are invisible to our detector}. As a result it proves to be quite challenging to separate the two.\\ By using momentum conservation, it becomes possible to differentiate the $\mu \rightarrow eee$ and the $\mu \rightarrow eee\nu\nu$ events. In the muon rest frame the total momentum is zero and the energy of the resulting particles is equal to muon rest energy.\\ By reconstructing the energy and momenta of the three $e$ we can check if their momenta add up to zero and their energies equal the muon rest energy. If not we can assume that there are additional neutrinos. This differentiation between the two events is crucial for the experiment as the $\mu \rightarrow eee\nu\nu$ events pose the most serious background for $\mu \rightarrow eee$ decay measurements.\\ As a result, our detector needs a very good energy resolution to consistently make it possible to differentiate between the two events as neutrino energies and momenta are very small. @@ -47,7 +49,7 @@ \subsubsection{Pion decays} Certain pion decays also lead to indistinguishable signature as our searched event, the most prominent being the $\pi \rightarrow eee\nu$ and $\pi \rightarrow \mu\gamma\nu$ decays. The later only produces a similar signature if produced photon converts through pair production to an electron and a positron.\\ -However as only a negligible portion will actually contribute to the background, as there is only a small branching fraction and the momenta and energy of the produced particles have to match up with the criteria mentioned in section \ref{Kinematics}. +However, as only a negligible portion will actually contribute to the background, as there is only a small branching fraction and the momenta and energy of the produced particles have to match up with the criteria mentioned in section \ref{Kinematics}. \subsubsection{Analysis of the background} diff --git a/Report/03_experimental_setup.aux b/Report/03_experimental_setup.aux index 06175a0..84757f8 100644 --- a/Report/03_experimental_setup.aux +++ b/Report/03_experimental_setup.aux @@ -10,8 +10,8 @@ \newlabel{sub@setup_Ia}{{a}{16}{Setup of the detector in the first part of phase I\relax }{figure.caption.8}{}} \newlabel{tracks_Ia}{{4b}{16}{Tracks in the detector in the first part of phase I\relax }{figure.caption.8}{}} \newlabel{sub@tracks_Ia}{{b}{16}{Tracks in the detector in the first part of phase I\relax }{figure.caption.8}{}} -\newlabel{tracks_Ib,_II}{{4c}{16}{Tracks in the detector in the second part of phase I and Phase II\relax }{figure.caption.8}{}} -\newlabel{sub@tracks_Ib,_II}{{c}{16}{Tracks in the detector in the second part of phase I and Phase II\relax }{figure.caption.8}{}} +\newlabel{tracks_Ib,_II}{{4c}{16}{Tracks in the detector in the second part of phase I and phase II\relax }{figure.caption.8}{}} +\newlabel{sub@tracks_Ib,_II}{{c}{16}{Tracks in the detector in the second part of phase I and phase II\relax }{figure.caption.8}{}} \newlabel{setup_Ib}{{4d}{16}{Setup of the detector in the second part of phase I\relax }{figure.caption.8}{}} \newlabel{sub@setup_Ib}{{d}{16}{Setup of the detector in the second part of phase I\relax }{figure.caption.8}{}} \newlabel{setup_II}{{4e}{16}{Setup of the detector in phase II\relax }{figure.caption.8}{}} @@ -21,10 +21,10 @@ \citation{philipp2015hv} \citation{augustin2015mupix} \@writefile{toc}{\contentsline {subsection}{\numberline {4.5}The problem of low longitudinal momentum recurlers}{18}{subsection.4.5}} -\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Particle recurling back into the center station\relax }}{19}{figure.caption.9}} -\newlabel{recurler}{{5}{19}{Particle recurling back into the center station\relax }{figure.caption.9}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Particle recurling back into the center station\relax }}{20}{figure.caption.10}} -\newlabel{recurler}{{6}{20}{Particle recurling back into the center station\relax }{figure.caption.10}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Particle recurling back into the center station (highlighted)\relax }}{19}{figure.caption.9}} +\newlabel{recurler}{{5}{19}{Particle recurling back into the center station (highlighted)\relax }{figure.caption.9}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Particle recurling back into the center station (highlighted)\relax }}{20}{figure.caption.10}} +\newlabel{recurler}{{6}{20}{Particle recurling back into the center station (highlighted)\relax }{figure.caption.10}{}} \@setckpt{03_experimental_setup}{ \setcounter{page}{21} \setcounter{equation}{4} diff --git a/Report/03_experimental_setup.tex b/Report/03_experimental_setup.tex index 0f62004..5d3569e 100644 --- a/Report/03_experimental_setup.tex +++ b/Report/03_experimental_setup.tex @@ -2,7 +2,7 @@ \subsection{Requirements} -The ultimate goal of this experiment is to observe a $\mu \rightarrow eee$ event. As we strive for a sensitivity of $10^{-16}$ , we should be able to observe this process if its branching ratio would be higher than our sensitivity. Otherwise we want to exclude a branching ratio $>10^{-16}$ with a $90\%$ certainty.\\ +The ultimate goal of this experiment is to observe a $\mu \rightarrow eee$ event. As we strive for a sensitivity of $10^{-16}$ , we should be able to observe this process if its branching ratio would be higher than our sensitivity. Otherwise, we want to exclude a branching ratio $>10^{-16}$ with a $90\%$ certainty.\\ To get to this sensitivity, more than $5.5 \cdot 10^{16}$ muon decays have to be observed. To reach this goal within one year, a muon stopping rate of $2 \cdot 10^9 Hz$ in combination with a high geometrical acceptance as well as a high efficiency of the experiment is required. \subsection{Phase I} @@ -11,7 +11,7 @@ \subsection{Phase II} -Phase II strives to reach the maximum sensitivity of $10^{-16}$. To achieve this in a reasonable time a new beamline will be used which delivers more than $2\cdot10^{9}Hz$ of muons. +Phase II strives to reach the maximum sensitivity of $10^{-16}$. To achieve this in a reasonable timeframe, a new beamline will be used which delivers more than $2\cdot10^{9}Hz$ of muons. \subsection{Experimental setup} \label{exp_setup} @@ -31,7 +31,7 @@ \end{subfigure} \begin{subfigure}{0.45\textwidth} \includegraphics[width=0.8\textwidth]{img/tracks-phase_II.png} -\caption{Tracks in the detector in the second part of phase I and Phase II} +\caption{Tracks in the detector in the second part of phase I and phase II} \label{tracks_Ib,_II} \end{subfigure} \begin{subfigure}{1\textwidth} @@ -48,27 +48,27 @@ \end{center} \end{figure}\newpage -As seen in figure \ref{setup_II}, the final version of the detector can be divided into 5 separate parts in the longitudinal direction. There is the central part with the target, two inner silicon pixel layers, a fibre tracker and two outer silicon layers. The forward and backward parts, called recurl stations, consist only of a tile timing detector surrounded by two silicon recurl layers. A big advantage pf this layout is that even a partially constructed detector (gradally over phase I to phase II parts get added) can give us competitive measurements.\\ +As seen in figure \ref{setup_II}, the final version of the detector can be divided into 5 separate parts in the longitudinal direction. There is the central part with the target, two inner silicon pixel layers, a fibre tracker and two outer silicon layers. The forward and backward parts, called recurl stations, consist only of a tile timing detector surrounded by two silicon recurl layers. A big advantage of this layout is that even a partially constructed detector (gradually over phase I to phase II parts get added) can give us competitive measurements.\\ The target itself is a big surfaced double cone with a surface length of $10cm$ and a width of $2cm$. The target was chosen specifically to be of this shape to facilitate separating tracks coming from different muons and hereby also helping to reduce accidental background.\\ -The two inner detector layers, also called vertex layers, span a length $12cm$. The innermost layer consists of 12 tiles while the outer vertex layer consists of 18 tiles. The tiles are each of $1cm$ width, with the inner layer having an average radius of $1.9cm$, respectively $2.9cm$ \cite{augustin2017mupix}, \cite{philipp2015hv}, \cite{augustin2015mupix}. They are supported by two half cylinder made up of $25\mu m$ thin Kapton foil mounted on plastic. The detector layers itself are $50\mu m$ thin and cooled by gaseous helium. The vertex detectors are read out at a rate of $20MHz$, giving us a time resolution of $20ns$.\\ -After the vertex layers the particles pass through the fibre tracker (see Figure \ref{tracks_Ib,_II}, \ref{setup_II}). It is positioned around $6cm$ away from the center. Its main job is to provide accurate timing information for the outgoing electrons and positrons. It consist of three to five layers, each consisting of $36cm$ long and $250\mu m$ thick scintillating fibres with fast silicon photomultipliers at the end. They provide us a timing information of less than a $1ns$.\\ +The two inner detector layers, also called vertex layers, span a length $12cm$. The innermost layer consists of 12 tiles while the outer vertex layer consists of 18 tiles. The tiles are each of $1cm$ width, with the inner layer having an average radius of $1.9cm$, respectively $2.9cm$, and a pixel size of $80 \cross 80 \mu m^2$. \cite{augustin2017mupix}, \cite{philipp2015hv}, \cite{augustin2015mupix}. They are supported by two half cylinder made up of $25\mu m$ thin Kapton foil mounted on plastic. The detector layers itself are $50\mu m$ thin and cooled by gaseous helium. The vertex detectors are read out at a rate of $20MHz$, giving us a time resolution of $20ns$.\\ +After the vertex layers the particles pass through the fibre tracker (see Figure \ref{tracks_Ib,_II}, \ref{setup_II}). It is positioned around $6cm$ away from the center. Its main job is to provide accurate timing information for the outgoing electrons and positrons. It consists of three to five layers, each consisting of $36cm$ long and $250\mu m$ thick scintillating fibres with fast silicon photomultipliers at the end. They provide us a timing information of less than a $1ns$.\\ Next the outgoing particles encounter the outer silicon pixel detectors. They are mounted just after the fibre detector with average radii of $7.6cm$ and $8.9cm$. The inner layer has 24 and the outer has 28 tiles of $1cm$ length. The active area itself has a length of $36cm$. Similarly to the vertex detectors, they are mounted on $25\mu m$ thin Kapton foil with plastic ends.\\ The stations beam up- and downwards only consist of the outer pixel detector layers as well as a timing detector. While the silicon detector are the same as in the central station, the timing tracker was chosen to be much thicker than the fibre detector in the central station. It consists of scintillating tiles with dimensions of $7.5 \cross 7.5 \cross 5 mm^3$. They provide an even better time resolution than the fibre tracker in the center. Incoming particles are supposed to be stopped here. The outer stations are mainly used to determine the momenta of the outgoing particles and have an active length of $36cm$ and a radius of around $6cm$. \subsection{The problem of low longitudinal momentum recurlers} -As explained in section \ref{exp_setup}, the outgoing particles are supposed to recurl back into the outer stations of the detector to enable a precise measurement of the momentum. A problem arises if the particles have almost no momentum in the beam direction. Then they can recurl back into the central station and cause additional hits there. As the the central station is designed to let particles easily pass through, they can recurl inside the central station many more times without getting stopped. As we have a $20ns$ time window for the readout of the pixel detectors, we need a very reliable way to identify and reconstruct these tracks as recurling particles as otherwise they look exactly like newly produced particles coming from our target. As one can imagine this influences the precision of our measurements by a big margin. So finding a way to identify these low beam direction momentum particles consistently is of great importance as it is crucial for the experiment to reduce the background as much as possible.\\ +As explained in section \ref{exp_setup}, the outgoing particles are supposed to recurl back into the outer stations of the detector to enable a precise measurement of the momentum. A problem arises if the particles have almost no momentum in the beam direction. Then they can recurl back into the central station and cause additional hits there. As the the central station is designed to let particles easily pass through, they can recurl inside the central station many more times without getting stopped. As we have a $20ns$ time window for the readout of the pixel detectors, we need a very reliable way to identify and reconstruct these tracks as recurling particles as otherwise they look exactly like newly produced particles coming from our target. As one can imagine, this influences the precision of our measurements by a big margin. So, finding a way to identify these low beam direction momentum particles consistently is of great importance as it is crucial for the experiment to reduce the background as much as possible.\\ -There is already an existing software to reconstruct particle tracks. However it struggles to find the right tracks for a lot of the particles recurling back into the center station.\\ +There is already an existing software to reconstruct particle tracks. However, it struggles to find the right tracks for a lot of the particles recurling back into the center station.\\ These recurlers will typically leave eight hits or more, four (one on each silicon pixel detector layer) when initially leaving the detector and another four when initially falling back in. It is possible for these recurlers to produce even more hits when leaving the detector again but for this thesis we will be only focusing on these 8 hit tracks.\\ The current reconstruction algorithm works by fitting helix paths with a $\chi^2$ method onto the 8 hits.\\ -However experience has shown that often the fit with the lowest $\chi^2$ isn't necessarily the right track. If we increase the $\chi^2$ limit value to some arbitrary limit, we get a selection of several possible tracks per particle. Without any additional tools however, it is impossible to figure out if the right track is in the selection\footnote{\alignLongunderstack{\text{Based on detector efficiency it is possible for a particle to leave less}\\ \text{than 8 tracks and therefore not be reconstructed by the algorithm}}} and if yes which one of them correct track is. +However, experience has shown that often the fit with the lowest $\chi^2$ isn't necessarily the right track. If we increase the $\chi^2$ limit value to some arbitrary limit, we get a selection of several possible tracks per particle. Without any additional tools however, it is impossible to figure out if the right track is in the selection\footnote{\alignLongunderstack{\text{Based on detector efficiency it is possible for a particle to leave less}\\ \text{than 8 tracks and therefore not be reconstructed by the algorithm}}} and if yes which one of them correct track is. \begin{figure}[H] \begin{center} \includegraphics[width=.8\textwidth]{img/tracks_in_det_xy.png} -\caption{Particle recurling back into the center station} +\caption{Particle recurling back into the center station (highlighted)} \label{recurler} \end{center} \end{figure} @@ -76,7 +76,7 @@ \begin{figure}[H] \begin{center} \includegraphics[width=.8\textwidth]{img/tracks_in_det_z.png} -\caption{Particle recurling back into the center station} +\caption{Particle recurling back into the center station (highlighted)} \label{recurler} \end{center} \end{figure} diff --git a/Report/04_machine_learning.aux b/Report/04_machine_learning.aux index 324b040..440d3ae 100644 --- a/Report/04_machine_learning.aux +++ b/Report/04_machine_learning.aux @@ -42,19 +42,19 @@ \newlabel{batch_norm}{{9}{27}{The effects of Batch Normalization on data\relax }{figure.caption.13}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Recurrent Neural Networks}{27}{subsection.5.3}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {5.3.1}General concepts}{27}{subsubsection.5.3.1}} -\citation{schuster1997bidirectional} \@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces General RNN architecture\relax }}{28}{figure.caption.14}} \newlabel{RNN_arch}{{10}{28}{General RNN architecture\relax }{figure.caption.14}{}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.3.2}Most common architectures}{28}{subsubsection.5.3.2}} +\citation{schuster1997bidirectional} \citation{gers1999learning} \citation{chung2014empirical} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.3.2}Most common architectures}{29}{subsubsection.5.3.2}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {5.3.3}Cell types}{29}{subsubsection.5.3.3}} \citation{ML:XGBoost} \@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces Architecture of a LSTM cell\relax }}{30}{figure.caption.15}} \newlabel{LSTM_arch}{{11}{30}{Architecture of a LSTM cell\relax }{figure.caption.15}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.4}XGBoost}{30}{subsection.5.4}} \@setckpt{04_machine_learning}{ -\setcounter{page}{31} +\setcounter{page}{32} \setcounter{equation}{11} \setcounter{enumi}{0} \setcounter{enumii}{0} diff --git a/Report/04_machine_learning.tex b/Report/04_machine_learning.tex index 49fe009..064aa16 100644 --- a/Report/04_machine_learning.tex +++ b/Report/04_machine_learning.tex @@ -11,7 +11,7 @@ \subsubsection{General concepts} -The fundamental concept behind artificial neural networks is to imitate the architecture of the human brain. They can be used for classification problems as well as regression problems. In its most simple form it can be thought of some sort of mapping from some input to some target. For this thesis two neural networks of a special subtype of neural networks, called recurrent neural networks, were used. All of the networks used in this thesis were written in the python library Keras \cite{chollet2015keras} with a Tensorflow \cite{abadi2016tensorflow} backend. In this section the basic principles of neural networks will be explained.\\ +The fundamental concept behind artificial neural networks is to imitate the architecture of the human brain. They can be used for classification problems as well as regression problems. In its most simple form, it can be thought of some sort of mapping from some input to some target. For this thesis two neural networks of a special subtype of neural networks, called recurrent neural networks, were used. All of the networks used in this thesis were written in the python library Keras \cite{chollet2015keras} with a Tensorflow \cite{abadi2016tensorflow} backend. In this section the basic principles of neural networks will be explained.\\ A neural network consists of many neurons organized in layers as seen in figure \ref{neural_network_arch}. Each neuron is connected to every neuron in the neighbouring layers, while each of these connections has a specific weight assigned to it.\\ In its most basic form, each neuron calculates a weighted sum to all of its inputs and then applies a bias to it . In addition, each neuron has an activation function, which will be applied at the end of the calculation (see also figure \ref{neuron}): @@ -39,7 +39,7 @@ \end{center} \end{figure} -There is no way of knowing how many dimensions and layers will give you the best performance, as one can only define general effects of what happens when they are being modified. Generally, increasing the number of layers enables the system to solve more complex problems, while more dimensions make the system more flexible. However, even these general guidelines are to be applied with caution. For example; adding too many layers can cause the system to train exceedingly slow, whilst adding to many neurons with a too small training set can result in overfitting\footnote{When a system performs well on the training set but poorly on the test set}. Depending on the problem and the data given, each has its own optimal configuration. By gaining more experience with NN, people can take better guesses where to start. However, in the end it always results in some sort of systematic trial and error to find the optimal configuration.\\ +There is no way of knowing how many dimensions and layers will give you the best performance, as one can only define general effects of what happens when they are being modified. Generally, increasing the number of layers enables the system to solve more complex problems, while more dimensions make the system more flexible. However, even these general guidelines are to be applied with caution. For example, adding too many layers can cause the system to train exceedingly slow, whilst adding to many neurons with a too small training set can result in overfitting\footnote{When a system performs well on the training set but poorly on the test set}. Depending on the problem and the data given, each has its own optimal configuration. By gaining more experience with NN, people can take better guesses where to start. However, in the end it always results in some sort of systematic trial and error to find the optimal configuration.\\ \subsubsection{Activation functions} @@ -84,7 +84,7 @@ \subsubsection{Loss functions} -To train the system we need some way to parametrize the quality of our predictions. To account for that we use a loss function. A loss function takes the predicted values of the system and the targeted values to give us an absolute value of our performance. There are various loss functions. In the two RNN's "mean squared error"(MSE, formula \ref{MSE}) and "binary crossentropy"(BC, formula \ref{BC}) were being used. The goal of every NN is to minimize the loss function. +To train the system, we need some way to parametrize the quality of our predictions. To account for that we use a loss function. A loss function takes the predicted values of the system and the targeted values to give us an absolute value of our performance. There are various loss functions. In the two RNN's "mean squared error"(MSE, formula \ref{MSE}) and "binary crossentropy"(BC, formula \ref{BC}) were being used. The goal of every NN is to minimize the loss function. \begin{align} L(w,b) = \frac{1}{n} \sum^n_{i=1} (\hat{Y}_i(w_i,b_i) - Y_i)^2 @@ -117,7 +117,7 @@ \subsubsection{Adam} -The most commonly used algorithm however is the Adam \cite{chilimbi2014project}, which stands for Adaptive Moment estimation, training algorithm (see formulas \ref{adam_alg}). Is is essentially a combination of Momentum and RMSProp and takes the best of both. It is also the one used to train both RNN's of this thesis as it converges the quickest and most reliable to the global minimum. The algorithm contains two moments. The first moment is an exponentially decaying average of past gradients as in Momentum while the second +The most commonly used algorithm however, is the Adam algorithm \cite{chilimbi2014project}, which stands for Adaptive Moment estimation, training algorithm (see formulas \ref{adam_alg}). Is is essentially a combination of Momentum and RMSProp and takes the best of both. It is also the one used to train both RNN's of this thesis as it converges the quickest and most reliable to the global minimum. The algorithm contains two moments. The first moment is an exponentially decaying average of past gradients as in Momentum while the second moment is an exponentially decaying average of past squared gradients as in RMSProp. @@ -153,7 +153,7 @@ \subsubsection{Decaying learning rate} -To counteract the problem of "jumping" over the minimum repeatedly, some NN also use a decaying learning rate during their training. By using this the step size gets smaller with every consecutive step which should in principle result in the step size converging to zero when reaching the global minimum. Most NN's, as well as the two RNN's used in this thesis, usually don't use a decaying learning rate as the Adam algorithm on its own already performs well enough. +To counteract the problem of "jumping" over the minimum repeatedly, some NN also use a decaying learning rate during their training. By using this, the step size gets smaller with every consecutive step which should in principle result in the step size converging to zero when reaching the global minimum. Most NN's, as well as the two RNN's used in this thesis, usually don't use a decaying learning rate as the Adam algorithm on its own already performs well enough. \subsubsection{Batch normalisation} @@ -198,19 +198,19 @@ There are two concepts of how the data is fed into the system and three structures of RNN's depending on the input and output of the system.\\ -Usually the data is fed into the system step by step just. For problems where note the entire sequence is known already at this is the only way to feed the data into the system.\\ -If however the entire sequence is already known at the beginning, e.g. sequence classification, often the information is fed into the system from both sides. Networks with this specific architecture are called bidirectional RNN's \cite{schuster1997bidirectional}. This often increases the systems performance.\\ -However as with the first RNN, we wanted to predict particle tracks after leaving the detector, we could only use a one directional RNN as the whole track wasn't available. The second RNN is actually a classifier of the tracks. With the whole information available from the start, it was designed to be a bidirectional RNN.\\ +Usually, the data is fed into the system step by step. For problems, where not the entire sequence is known already at the start, this is the only way to feed the data into the system.\\ +If however, the entire sequence is already known at the beginning, e.g. in sequence classification, often the information is read by the system forwards and backwards. Networks with this specific architecture are called bidirectional RNN's \cite{schuster1997bidirectional}. This often increases the systems performance.\\ +However, as with the first RNN, we wanted to predict particle tracks after leaving the detector, we could only use a one directional RNN as the whole track wasn't available. The second RNN is actually a classifier of the tracks. With the whole information available from the start, it was designed to be a bidirectional RNN.\\ -A system has a "many-to-one" architecture, if we have a sequential input but we only care the final output of the system, e.g. classification problems. This is the architecture used for both RNN's. With the same reasoning, if we have sequential inputs and want care about the output generated at each step, e.g. speech recognition, the architecture is called "many-to-many". A "one-to-one" architecture is basically just a regular NN. +A system has a "many-to-one" architecture, if we have a sequential input but we only care about the final output of the system, e.g. classification problems. This is the architecture used for both RNN's. With the same reasoning, if we have sequential inputs and want care about the output generated at each step, e.g. speech recognition, the architecture is called "many-to-many". A "one-to-one" architecture is basically just a regular NN. \subsubsection{Cell types} -Besides the basic RNN cell type, which shall not be discussed in detail in this thesis, the two most influential and successful cell types are Long-Short-Term-Memory(LSTM) \cite{gers1999learning} cells and Gated Recurrent Units(GRU) \cite{chung2014empirical}. However in this thesis only LSTM cells will be explained in greater detail as the were the only cells used in the RNN's.\\ +Besides the basic RNN cell type, which shall not be discussed in detail in this thesis, the two most influential and successful cell types are Long-Short-Term-Memory(LSTM) \cite{gers1999learning} cells and Gated Recurrent Units(GRU) \cite{chung2014empirical}. However, in this thesis only LSTM cells will be explained in greater detail as the were the only cells used in the RNN's.\\ GRU's were invented with the intention to create a cell type with a similar performance to the LSTM cell while having a simpler internal structure. By being less complex as an LSTM cell a GRU cell has also less parameters to modify during training which also speeds up training.\\ -LSTM cells (see figure \ref{LSTM_arch} have many useful properties such as a forget gate, an update gate as well as an output gate. With this cell type, it is easy to pass down information for the following steps without it being altered in a big way (Long term memory). However, there are also ways built in to update this passed down information with new one (Short term memory). Even though GRU's are gaining more and more traction, LSTM-cells are still widely considered to be the most successful type of cells. +LSTM cells (see figure \ref{LSTM_arch}) have many useful properties such as a forget gate, an update gate as well as an output gate. With this cell type, it is easy to pass down information for the following steps without it being altered in a big way (Long term memory). However, there are also ways built in to update this passed down information with new one (Short term memory). Even though GRU's are gaining more and more traction, LSTM-cells are still widely considered to be the most successful type of cells. \begin{figure}[H] \begin{center} @@ -234,6 +234,6 @@ \subsection{XGBoost} -XGBoost\cite{ML:XGBoost} is based on boosted decision trees (extreme gradient boosting). In this approach the data samples get split using a decision tree. With every step a new tree gets created to account for the errors of prior models, which are then added to create the final prediction. A gradient descent algorithm is used to minimize loss when adding new trees. \\ +XGBoost\cite{ML:XGBoost} is based on boosted decision trees (extreme gradient boosting). In this approach, the data samples get split using a decision tree. With every step a new tree gets created to account for the errors of prior models, which are then added to create the final prediction. A gradient descent algorithm is used to minimize loss when adding new trees. \\ -Its is often used as a classifier, however it can also used in regression models. In this thesis, an XGBoost classifier was used to determine a baseline and have some comparison for our bidirectional RNN classifier. \ No newline at end of file +It's is often used as a classifier. However, it can also used in regression models. In this thesis, an XGBoost classifier was used to determine a baseline and have some comparison for our bidirectional RNN classifier. \ No newline at end of file diff --git a/Report/05_Data.aux b/Report/05_Data.aux index 2027651..05256db 100644 --- a/Report/05_Data.aux +++ b/Report/05_Data.aux @@ -2,14 +2,14 @@ \providecommand\hyper@newdestlabel[2]{} \citation{agostinelli2003s} \citation{pedregosa2011scikit} -\@writefile{toc}{\contentsline {section}{\numberline {6}Data}{31}{section.6}} -\@writefile{toc}{\contentsline {subsection}{\numberline {6.1}General information}{31}{subsection.6.1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Preprocessing}{31}{subsection.6.2}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {6.2.1}Dataset 1}{31}{subsubsection.6.2.1}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {6.2.2}Dataset 2}{32}{subsubsection.6.2.2}} -\newlabel{dataset2}{{6.2.2}{32}{Dataset 2}{subsubsection.6.2.2}{}} +\@writefile{toc}{\contentsline {section}{\numberline {6}Data}{32}{section.6}} +\@writefile{toc}{\contentsline {subsection}{\numberline {6.1}General information}{32}{subsection.6.1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Preprocessing}{32}{subsection.6.2}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {6.2.1}Dataset 1}{32}{subsubsection.6.2.1}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {6.2.2}Dataset 2}{33}{subsubsection.6.2.2}} +\newlabel{dataset2}{{6.2.2}{33}{Dataset 2}{subsubsection.6.2.2}{}} \@setckpt{05_Data}{ -\setcounter{page}{33} +\setcounter{page}{34} \setcounter{equation}{11} \setcounter{enumi}{0} \setcounter{enumii}{0} diff --git a/Report/05_Data.tex b/Report/05_Data.tex index 5d7084a..e18b12d 100644 --- a/Report/05_Data.tex +++ b/Report/05_Data.tex @@ -6,16 +6,16 @@ The first dataset(dataset 1) contained 46896 true 8-hit tracks of recurling particles, and each hit consisting of 3 coordinates (x,y,z).\\ -The second dataset(dataset 2) contained 109821 tracks. These were exclusively tracks that the current track reconstruction algorithm wasn't conclusively able to assign to an event. As a result every event contained all the preselected tracks, computed by the already existing algorithm, that were calculated to be a possible track. It is important to note that only for around $75\%$ of the events, the true track was in this preselection. This posed an additional challenge, as one could not just simply chose the best fitting track. To assign the tracks to their corresponding events, they all carried an event number with them matching them with their event.\footnote{One number for all tracks of the same events}. Each track contained the coordinates of the 8 hits (x,y,z), the value of the $\chi^2$-fit performed by the reconstruction algorithm, the event number as well as a label with told us if the track was true or false\footnote{Only used for training and testing of the system}. +The second dataset(dataset 2) contained 109821 tracks. These were exclusively tracks that the current track reconstruction algorithm wasn't conclusively able to assign to an event. As a result, every event contained all the preselected tracks, computed by the already existing algorithm, that were calculated to be a possible track. It is important to note that only for around $75\%$ of the events, the true track was in this preselection. This posed an additional challenge, as one could not just simply chose the best fitting track. To assign the tracks to their corresponding events, they all carried an event number with them matching them with their event.\footnote{One number for all tracks of the same events}. Each track contained the coordinates of the 8 hits (x,y,z), the value of the $\chi^2$-fit performed by the reconstruction algorithm, the event number as well as a label which told us if the track was true or false\footnote{Only used for training and testing of the system}. \subsection{Preprocessing} \subsubsection{Dataset 1} -To optimize the data fed into the RNN, dataset 1 was preprocessed. In a first step a a min-max scaler with a range of $[-0.9,0.9]$ from the python library Scikit-learn \cite{pedregosa2011scikit} was used. In a second step the data got shuffled and split into the training and test sets. The first four steps were used as an input for the RNN while the second four steps were our prediction target. +To optimize the data fed into the RNN, dataset 1 was preprocessed. In a first step, a min-max scaler with a range of $[-0.9,0.9]$ from the python library Scikit-learn \cite{pedregosa2011scikit} was used. This particular choice of range was based on the fact that a $tanh$ activation function was used in the output layer. To accommodate for its properties of being asymptotically bounded by $\pm 1$ we chose a range of $[-0.9,0.9]$ to make all the data easily reachable by the system. In a second step, the data got shuffled and split into the training and test sets. The first four steps were used as an input for the RNN while the second four steps were our prediction target. \subsubsection{Dataset 2} \label{dataset2} -Analogously to dataset 1, first the coordinates of the tracks as well as the $\chi^2$ were scaled with a min max scaler (separate ones) with a range of $[-0.9,0.9]$ from the python library Scikit-learn. Then the first four steps of every track were taken and fed into our first track predicting RNN. For each of the last four steps of a track we then had two sets of coordinates. One were the predicted coordinates of our RNN and the other one the coordinates given by the reconstructing algorithm. To have the information of the $\chi^2$ fit available at each step, we created a an array of shape $(\#tracks, steps, 4)$ (1 dimension for each of the coordinates and another for the $\chi^2$ fit). However at the spot of the x,y,z coordinates there were neither the predicted coordinates of our RNN nor the coordinates given by the reconstructing algorithm but instead the difference of the two. Our target was the truth value of each track\footnote{$1 =$ true, $0 =$ false}. +Analogously to dataset 1, first the coordinates of the tracks as well as the $\chi^2$ were scaled with a min max scaler (separate ones) with a range of $[-0.9,0.9]$ from the python library Scikit-learn. Then the first four steps of every track were taken and fed into our first track predicting RNN. For each of the last four steps of a track we then had two sets of coordinates. One were the predicted coordinates of our RNN and the other one the coordinates given by the reconstructing algorithm. To have the information of the $\chi^2$ fit available at each step, we created an array of shape $(\#tracks, steps, 4)$ (1 dimension for each of the coordinates and another for the $\chi^2$ fit). However, at the spot of the x,y,z coordinates there were neither the predicted coordinates of our RNN nor the coordinates given by the reconstructing algorithm but instead the difference of the two. Our target was the truth value of each track\footnote{$1 =$ true, $0 =$ false}. diff --git a/Report/06_RNN_used.aux b/Report/06_RNN_used.aux index 114243c..3a2686c 100644 --- a/Report/06_RNN_used.aux +++ b/Report/06_RNN_used.aux @@ -1,14 +1,14 @@ \relax \providecommand\hyper@newdestlabel[2]{} -\@writefile{toc}{\contentsline {section}{\numberline {7}RNN's used}{33}{section.7}} -\@writefile{toc}{\contentsline {subsection}{\numberline {7.1}RNN for track prediction}{33}{subsection.7.1}} -\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces RNN Prediction architecture\relax }}{33}{figure.caption.16}} -\newlabel{RNN_pr_arch}{{12}{33}{RNN Prediction architecture\relax }{figure.caption.16}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {7.2}RNN for classification of tracks}{34}{subsection.7.2}} -\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces RNN classifier architecture\relax }}{35}{figure.caption.17}} -\newlabel{RNN_cl_arch}{{13}{35}{RNN classifier architecture\relax }{figure.caption.17}{}} +\@writefile{toc}{\contentsline {section}{\numberline {7}RNN's used}{34}{section.7}} +\@writefile{toc}{\contentsline {subsection}{\numberline {7.1}RNN for track prediction}{34}{subsection.7.1}} +\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces RNN Prediction architecture\relax }}{34}{figure.caption.16}} +\newlabel{RNN_pr_arch}{{12}{34}{RNN Prediction architecture\relax }{figure.caption.16}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {7.2}RNN for classification of tracks}{35}{subsection.7.2}} +\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces RNN classifier architecture\relax }}{36}{figure.caption.17}} +\newlabel{RNN_cl_arch}{{13}{36}{RNN classifier architecture\relax }{figure.caption.17}{}} \@setckpt{06_RNN_used}{ -\setcounter{page}{37} +\setcounter{page}{38} \setcounter{equation}{11} \setcounter{enumi}{0} \setcounter{enumii}{0} diff --git a/Report/06_RNN_used.tex b/Report/06_RNN_used.tex index 48bb9a6..bf6b06a 100644 --- a/Report/06_RNN_used.tex +++ b/Report/06_RNN_used.tex @@ -18,8 +18,8 @@ \begin{itemize} \item[1. Layer:] 50 LSTM cells \item[2. Layer:] 50 LSTM cells -\item[3. Layer:] 50 Dense cells\footnote{Dense cells are basically just basic NN cells as explained in section \ref{ML_Intro}} -\item[4. Layer:] 12 Dense cells +\item[3. Layer:] Dense layer (50 cells)\footnote{Dense layer cells are basically just basic NN cells as explained in section \ref{ML_Intro}} +\item[4. Layer:] Dense layer (12 cells) \end{itemize} The optimal number of layers, cells and cell-type was found by systematically comparing RNN's that are equal besides one property (e.g. Using GRU's instead of LSTM cells). Also all the activation functions were chosen to be selu's.\\ @@ -38,7 +38,7 @@ \item Value of the $\chi^2$ fit \end{itemize} -The output was then just a one dimensional vector, where $1$ stands for a true track and $0$ stands for a false track. The RNN itself is going to predict a number between $0$ and $1$ which can be interpreted as amount of confidence that it is a true track. +The output was then just a one dimensional vector, where $1$ stands for a true track and $0$ stands for a false track. The RNN itself is going to predict a number between $0$ and $1$, which can be interpreted as amount of confidence that it is a true track. \begin{figure}[H] \begin{center} @@ -48,15 +48,15 @@ \end{center} \end{figure} -The RNN for the classification was chosen to be bidirectional and as in the RNN before LSTM cells were used. Here a tanh was used for all the activation functions besides the last one. The last layer used a softmax activation function\footnote{Similar to a tanh but bounded between [0,1]} As tanh doesn't automatically do batch normalization, between every layer of cells a batch normalization layer was added.\\ +The RNN for the classification was chosen to be bidirectional and as in the RNN before LSTM cells were used. Here, a tanh was used for all the activation functions besides the last one. The last layer used a softmax activation function\footnote{Similar to a tanh but bounded between [0,1]}. As tanh doesn't automatically do batch normalization, between every layer of cells a batch normalization layer was added.\\ The layout of the layer was as follows: \begin{itemize} \item[1. Layer:] 30 LSTM cells (bidirectional, batch normalization) \item[2. Layer:] 30 LSTM cells (bidirectional, batch normalization) \item[3. Layer:] 30 LSTM cells (bidirectional, batch normalization) -\item[4. Layer:] 50 Dense cells (batch normalization) -\item[5. Layer:] 1 Dense cell (softmax actication function) +\item[4. Layer:] Dense layer (50 cells, batch normalization) +\item[5. Layer:] Dense layer (1 cell, softmax activation function) \end{itemize} -The optimal number of layers, cells and cell-type was found by systematically comparing different RNN architectures. Also it is important to note that the second RNN is directly dependant of the first RNN. When changing the first RNN one would also have to retrain the second. \ No newline at end of file +The optimal number of layers, cells and cell-type was found by systematically comparing different RNN architectures. Also, it is important to note that the second RNN is directly dependant of the first RNN. When changing the first RNN one would also have to retrain the second. \ No newline at end of file diff --git a/Report/07_Analysis.aux b/Report/07_Analysis.aux index 0cf57a9..429c1b2 100644 --- a/Report/07_Analysis.aux +++ b/Report/07_Analysis.aux @@ -1,30 +1,30 @@ \relax \providecommand\hyper@newdestlabel[2]{} -\@writefile{toc}{\contentsline {section}{\numberline {8}Results}{37}{section.8}} -\@writefile{toc}{\contentsline {subsection}{\numberline {8.1}Best $\chi ^2$}{37}{subsection.8.1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {8.2}RNN classifier with RNN track prediction input}{37}{subsection.8.2}} -\newlabel{RNN_tp_fp_hist}{{14a}{38}{Number of false positives and false negatives depending cut\relax }{figure.caption.18}{}} -\newlabel{sub@RNN_tp_fp_hist}{{a}{38}{Number of false positives and false negatives depending cut\relax }{figure.caption.18}{}} -\newlabel{RNN_ROC}{{14b}{38}{ROC curve for the RNN model\relax }{figure.caption.18}{}} -\newlabel{sub@RNN_ROC}{{b}{38}{ROC curve for the RNN model\relax }{figure.caption.18}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces XGBoost classifier figures\relax }}{38}{figure.caption.18}} +\@writefile{toc}{\contentsline {section}{\numberline {8}Results}{38}{section.8}} +\@writefile{toc}{\contentsline {subsection}{\numberline {8.1}Best $\chi ^2$}{38}{subsection.8.1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {8.2}RNN classifier with RNN track prediction input}{38}{subsection.8.2}} +\newlabel{RNN_tp_fp_hist}{{14a}{39}{Number of false positives and false negatives depending cut\relax }{figure.caption.18}{}} +\newlabel{sub@RNN_tp_fp_hist}{{a}{39}{Number of false positives and false negatives depending cut\relax }{figure.caption.18}{}} +\newlabel{RNN_ROC}{{14b}{39}{ROC curve for the RNN model\relax }{figure.caption.18}{}} +\newlabel{sub@RNN_ROC}{{b}{39}{ROC curve for the RNN model\relax }{figure.caption.18}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces XGBoost classifier figures\relax }}{39}{figure.caption.18}} \citation{ML:ROC_AUC:Bradley:1997:UAU:1746432.1746434} -\@writefile{toc}{\contentsline {subsection}{\numberline {8.3}XGBoost}{39}{subsection.8.3}} -\newlabel{XGB_tp_fp_hist}{{15a}{40}{Number of false positives and false negatives depending cut\relax }{figure.caption.19}{}} -\newlabel{sub@XGB_tp_fp_hist}{{a}{40}{Number of false positives and false negatives depending cut\relax }{figure.caption.19}{}} -\newlabel{XGB_ROC}{{15b}{40}{ROC curve for the XGBoost model\relax }{figure.caption.19}{}} -\newlabel{sub@XGB_ROC}{{b}{40}{ROC curve for the XGBoost model\relax }{figure.caption.19}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces XGBoost classifier figures\relax }}{40}{figure.caption.19}} -\@writefile{toc}{\contentsline {subsection}{\numberline {8.4}Comparison in performance of the RNN and XGBoost}{41}{subsection.8.4}} -\newlabel{RNN-XGB_ROC}{{\caption@xref {RNN-XGB_ROC}{ on input line 64}}{41}{Comparison in performance of the RNN and XGBoost}{figure.caption.20}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {16}{\ignorespaces Comparison ROC curves of RNN and XGBoost model\relax }}{41}{figure.caption.20}} +\@writefile{toc}{\contentsline {subsection}{\numberline {8.3}XGBoost}{40}{subsection.8.3}} +\newlabel{XGB_tp_fp_hist}{{15a}{41}{Number of false positives and false negatives depending cut\relax }{figure.caption.19}{}} +\newlabel{sub@XGB_tp_fp_hist}{{a}{41}{Number of false positives and false negatives depending cut\relax }{figure.caption.19}{}} +\newlabel{XGB_ROC}{{15b}{41}{ROC curve for the XGBoost model\relax }{figure.caption.19}{}} +\newlabel{sub@XGB_ROC}{{b}{41}{ROC curve for the XGBoost model\relax }{figure.caption.19}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces XGBoost classifier figures\relax }}{41}{figure.caption.19}} +\@writefile{toc}{\contentsline {subsection}{\numberline {8.4}Comparison in performance of the RNN and XGBoost}{42}{subsection.8.4}} +\@writefile{lof}{\contentsline {figure}{\numberline {16}{\ignorespaces Comparison ROC curves of RNN and XGBoost model\relax }}{42}{figure.caption.20}} +\newlabel{ROC_RNN_XGB}{{16}{42}{Comparison ROC curves of RNN and XGBoost model\relax }{figure.caption.20}{}} \citation{gent1992special} \citation{graves2013speech} -\@writefile{toc}{\contentsline {section}{\numberline {9}Results}{42}{section.9}} -\@writefile{toc}{\contentsline {subsection}{\numberline {9.1}Results}{42}{subsection.9.1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {9.2}Outlook and potential}{42}{subsection.9.2}} +\@writefile{toc}{\contentsline {section}{\numberline {9}Results}{43}{section.9}} +\@writefile{toc}{\contentsline {subsection}{\numberline {9.1}Results}{43}{subsection.9.1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {9.2}Outlook and potential}{43}{subsection.9.2}} \@setckpt{07_Analysis}{ -\setcounter{page}{43} +\setcounter{page}{44} \setcounter{equation}{11} \setcounter{enumi}{0} \setcounter{enumii}{0} diff --git a/Report/07_Analysis.tex b/Report/07_Analysis.tex index e89e200..f89c575 100644 --- a/Report/07_Analysis.tex +++ b/Report/07_Analysis.tex @@ -2,13 +2,13 @@ \subsection{Best $\chi^2$} -The most simple version to try to classify which one is the right path out of the preselection would be to just take a the path with the smallest $\chi^2$. Like this we would choose the path that agrees the most with the track reconstructing algorithm that gives us our preselection. However, as already mentioned, in dataset 2 only around $75\%$ of the events even have the true track among the ones preselected by the reconstruction\footnote{E.g. by not having all 8 hits as a result of detector efficiency (searches for 8 hits)}. In this case we would have to label all the tracks as false tracks. By simply choosing the best $\chi^2$ we don't account for this at all. So by default our maximum accuracy would be around $75\%$ if the true track would really always just be the one with the best $\chi^2$.\\ +The most simple version to try to classify which one is the right path out of the preselection would be to just take the path with the smallest $\chi^2$. Like this, we would choose the path that agrees the most with the track reconstructing algorithm that gives us our preselection. However, as already mentioned, in dataset 2 only around $75\%$ of the events even have the true track among the ones preselected by the reconstruction\footnote{E.g. by not having all 8 hits as a result of detector efficiency (searches for 8 hits)}. In this case we would have to label all the tracks as false tracks. By simply choosing the best $\chi^2$ we don't account for this at all. So, by default our maximum accuracy would be around $75\%$ if the true track would really always just be the one with the best $\chi^2$.\\ -It turns out the accuracy of this method is only at $52.01\%$. So there is a need for better algorithms to classify this problem. +It turns out the accuracy of this method is only at $52.01\%$. So, there is a need for better algorithms to classify this problem. \subsection{RNN classifier with RNN track prediction input} -The RNN's that we put in sequence (First track prediction then classification) are a much more complex model. When trained they were able to label all the tracks right with an accuracy of around $87.63\%$. Note that the $75\%$ limit of always choosing one track for every event was exceeded\footnote{Usually the one that is considered the best by the corresponding algorithm}.\\ +The RNN's that we put in sequence (first track prediction then classification) are a much more complex model. When trained, they were able to label all the tracks right with an accuracy of around $87.63\%$. Note that the $75\%$ limit of always choosing one track for every event was exceeded\footnote{Usually the one that is considered the best by the corresponding algorithm}.\\ \begin{figure}[H] \begin{center} @@ -26,9 +26,9 @@ \end{center} \end{figure} -As shown in figure \ref{RNN_tp_fp_hist} depending on where we apply the cut, we have a changing number of false positives and false negatives. In figure \ref{RNN_tp_fp_hist} the blue bins are false positives and the orange bins are false negatives. Depending on what is more important for the experiment\footnote{E.g. all positives have to be correct $\rightarrow$ increase cut}. One can also qualitatively judge the performance here, as in the optimal case all the false positives would gather at the area where the cut goes to $0$. Analogously we want all the false negatives to gather at the cut around $1$. Here we see that this is fulfilled really well. So already by this graph we see that the system will perform well.\\ +As shown in figure \ref{RNN_tp_fp_hist}, depending on where we apply the cut, we have a changing number of false positives and false negatives. In figure \ref{RNN_tp_fp_hist}, the blue bins are false positives and the orange bins are false negatives. Depending on what is more important for the experiment\footnote{E.g. all positives have to be correct $\rightarrow$ increase cut}. One can also qualitatively judge the performance here, as in the optimal case all the false positives would gather at the area where the cut goes to $0$. Analogously, we want all the false negatives to gather at the cut around $1$. Here we see that this is fulfilled really well. So, already by this graph we see that the system will perform well.\\ -Figure \ref{RNN_ROC} shows the ROC curve \cite{ML:ROC_AUC:Bradley:1997:UAU:1746432.1746434} of the RNN classifier. Generally the more area under the ROC curve the better the classifier. In the perfect case, where everything gets labelled $100\%$ correctly, the area under the curve(ROC AUC) would be $1$ and random guessing would be around $0.5$. Here we have an area of $0.93$. This is already really close to the optimal case. +Figure \ref{RNN_ROC} shows the ROC curve \cite{ML:ROC_AUC:Bradley:1997:UAU:1746432.1746434} of the RNN classifier. Generally, the more area under the ROC curve the better the classifier. In the perfect case, where everything gets labelled $100\%$ correctly, the area under the curve(ROC AUC) would be $1$ and random guessing would be around $0.5$. Here, we have an area of $0.93$. This is already really close to the optimal case. \subsection{XGBoost} @@ -50,19 +50,19 @@ \end{center} \end{figure} -In figure \ref{XGB_tp_fp_hist} the blue bins are false positives and the orange bins are false negatives. Here we see that the bins are more evenly spread and gather less at the edges. So already qualitatively we can guess that it will perform worse than our RNN's.\\ +In figure \ref{XGB_tp_fp_hist} the blue bins are false positives and the orange bins are false negatives. Here we see that the bins are more evenly spread and gather less at the edges. So, already qualitatively we can guess that it will perform worse than our RNN's.\\ -Figure \ref{XGB_ROC} shows the ROC curve of the XGB classifier. Generally the more area under the ROC curve the better the classifier. In the perfect case, where everything gets labelled $100\%$ correctly, the area under the curve would be 1. Here we have an area of $0.88$.\\ +Figure \ref{XGB_ROC} shows the ROC curve of the XGB classifier. Generally, the more area under the ROC curve the better the classifier. In the perfect case, where everything gets labelled $100\%$ correctly, the area under the curve would be 1. Here we have an area of $0.88$.\\ \subsection{Comparison in performance of the RNN and XGBoost} -The RNN classifier performs with around $6\%$ better accuracy than the XGBoost classifier. Also by comparing the the ROC curves in figure \ref{RNN-XGB_ROC}, one can clearly see that the area under the RNN ROC curve is bigger. In numbers we have around $0.05>$ more area under the curve for the RNN model. The RNN classifier performs significantly better in labelling the 8 hit tracks than the XGBoost model. +The RNN classifier performs with around $6\%$ better accuracy than the XGBoost classifier. Also, by comparing the the ROC curves in figure \ref{ROC_RNN_XGB}, one can clearly see that the area under the RNN ROC curve is bigger. In numbers we have around $0.05>$ more area under the curve for the RNN model. The RNN classifier performs significantly better in labelling the 8 hit tracks than the XGBoost model. \begin{figure}[H] \begin{center} \includegraphics[width=0.8\textwidth]{img/RNN-XGB_ROC-curve_comparison.png} -\label{RNN-XGB_ROC} \caption{Comparison ROC curves of RNN and XGBoost model} +\label{ROC_RNN_XGB} \end{center} \end{figure} \newpage @@ -85,5 +85,5 @@ \subsection{Outlook and potential} Where do we want to go from here? One way to improve the algorithm would for example be to create a fully connected neural network \cite{gent1992special}. By doing this both RNN's would be connected and would train as a unit. This would have the positive effect of not having to retrain the classifying RNN as well whenever the first on gets changed. \\ -Another goal could be to make this type of RNN appliable to more types of problems. So for example instead of being restricted to tracks of a specific length (here eight hits) one could make it more general to be able to deal with an arbitrary length of the track. This would be especially useful for this experiment, as a lot of particles don't just recurl once but many times over (in the central station). Hereby the are creating a lot of background, which minimalizing is crucial to reach our desired sensitivity of $10^{-16}$.\\ -The ultimate goal however would be to replace the current track reconstruction algorithm altogether and put a RNN in its place. This could for example be done by an RNN performing beam search\footnote{Both inside out and outside in} \cite{graves2013speech} to find the true track of a particle. In other areas, beam search has proven to be a powerful tool and there is a lot of potential for this sort of algorithm in physics as well, especially in track reconstruction \ No newline at end of file +Another goal could be to make this type of RNN appliable to more types of problems. So for example, instead of being restricted to tracks of a specific length (here eight hits) one could make it more general to be able to deal with an arbitrary length of the track. This would be especially useful for this experiment, as a lot of particles don't just recurl once but many times over (in the central station). Hereby, they are creating a lot of background, which minimalizing is crucial to reach our desired sensitivity of $10^{-16}$.\\ +The ultimate goal however, would be to replace the current track reconstruction algorithm altogether and put a RNN in its place. This could for example be done by an RNN performing beam search\footnote{Both inside out and outside in} \cite{graves2013speech} to find the true track of a particle. In other areas, beam search has proven to be a powerful tool and there is a lot of potential for this sort of algorithm in physics as well, especially in track reconstruction \ No newline at end of file diff --git a/Report/08_Appendix.aux b/Report/08_Appendix.aux index 8eceef3..0bc6ccb 100644 --- a/Report/08_Appendix.aux +++ b/Report/08_Appendix.aux @@ -1,8 +1,8 @@ \relax \providecommand\hyper@newdestlabel[2]{} -\@writefile{toc}{\contentsline {section}{\numberline {10}Acknowledgements}{43}{section.10}} +\@writefile{toc}{\contentsline {section}{\numberline {10}Acknowledgements}{44}{section.10}} \@setckpt{08_Appendix}{ -\setcounter{page}{44} +\setcounter{page}{45} \setcounter{equation}{11} \setcounter{enumi}{0} \setcounter{enumii}{0} diff --git a/Report/08_Appendix.tex b/Report/08_Appendix.tex index 1d48a90..dc84061 100644 --- a/Report/08_Appendix.tex +++ b/Report/08_Appendix.tex @@ -1,6 +1,6 @@ \section{Acknowledgements} -I would like to to thank the Physics Department of the University of Zurich. +I would like to thank the Physics Department of the University of Zurich. Special thanks goes to Prof. Nicola Serra of the University of Zurich who let me do this thesis in his research and introduced me into the world of neural networks.\\ -I would also like to express my gratitude towards Dr. Patrick Owen for providing me with data and always being here to help.\\ +I would also like to express my gratitude towards Dr. Patrick Owen for providing me with data and always being here to help when I had questions.\\ Also special thanks goes to Jonas Eschle, who was always there to help me with programming the RNN's and discuss techniques and tricks. \ No newline at end of file diff --git a/Report/bib/General.bib b/Report/bib/General.bib index 5b35db6..ec9dd5e 100644 --- a/Report/bib/General.bib +++ b/Report/bib/General.bib @@ -1,5 +1,17 @@ %%% General Books and Citations +%Z-width at LEP +@article{akrawy1989measurement, + title={Measurement of the Z0 Mass and Width with the OPAL Detector at LEP}, + author={Akrawy, MZ and Alexander, G and Allison, J and Allport, PP and Anderson, KJ and Armitage, JC and Arnison, GTJ and Ashton, P and Azuelos, G and Baines, JTM and others}, + journal={Physics Letters B}, + volume={231}, + number={4}, + pages={530--538}, + year={1989}, + publisher={Elsevier} +} + %beam search @inproceedings{graves2013speech, title={Speech recognition with deep recurrent neural networks}, diff --git a/Report/img/beta_decay_feynman.png b/Report/img/beta_decay_feynman.png index 1e1bb7f..45f2443 100644 --- a/Report/img/beta_decay_feynman.png +++ b/Report/img/beta_decay_feynman.png Binary files differ diff --git a/Report/img/muon-decay-feynman.png b/Report/img/muon-decay-feynman.png index c5c1208..c678c7d 100644 --- a/Report/img/muon-decay-feynman.png +++ b/Report/img/muon-decay-feynman.png Binary files differ