% vim: set sts=4 et : \documentclass[11 pt,xcolor={dvipsnames,svgnames,x11names,table}]{beamer} \usetheme[ bullet=circle, % Other option: square bigpagenumber, % circled page number on lower right topline=true, % colored bar at the top of the frame shadow=false, % Shading for beamer blocks watermark=BG_lower, % png file for the watermark ]{Flip} \usepackage{bm} \usepackage{tikz,pgfplots} \usepackage{graphicx} % SOME COMMANDS THAT I FIND HANDY % \renewcommand{\tilde}{\widetilde} % dinky tildes look silly, dosn't work with fontspec \newcommand{\comment}[1]{\textcolor{comment}{\footnotesize{#1}\normalsize}} % comment mild \newcommand{\Comment}[1]{\textcolor{Comment}{\footnotesize{#1}\normalsize}} % comment bold \newcommand{\COMMENT}[1]{\textcolor{COMMENT}{\footnotesize{#1}\normalsize}} % comment crazy bold \newcommand{\Alert}[1]{\textcolor{Alert}{#1}} % louder alert \newcommand{\ALERT}[1]{\textcolor{ALERT}{#1}} % loudest alert %% "\alert" is already a beamer pre-defined \newcommand*{\Scale}[2][4]{\scalebox{#1}{$#2$}}% \graphicspath{{images/}} % Put all images in this directory. Avoids clutter. % suppress frame numbering for backup slides % you always need the appendix for this! \newcommand{\backupbegin}{ \newcounter{framenumberappendix} \setcounter{framenumberappendix}{\value{framenumber}} } \newcommand{\backupend}{ \addtocounter{framenumberappendix}{-\value{framenumber}} \addtocounter{framenumber}{\value{framenumberappendix}} } \begin{document} \tikzstyle{every picture}+=[remember picture] { \setbeamertemplate{sidebar right}{\llap{\includegraphics[width=\paperwidth,height=\paperheight]{bubble2}}} \begin{frame}[c]%{\phantom{title page}} \begin{center} \begin{center} \begin{columns} \begin{column}{0.9\textwidth} \flushright%\fontspec{Trebuchet MS} \bfseries \Huge {Root Finding} \end{column} \begin{column}{0.2\textwidth} %\includegraphics[width=\textwidth]{SHiP-2} \end{column} \end{columns} \end{center} \quad \vspace{3em} \begin{columns} \begin{column}{0.6\textwidth} \flushright \vspace{-1.8em} {%\fontspec{Trebuchet MS} \large Marcin Chrzaszcz, Danny van Dyk\\\vspace{-0.1em}\small \href{mailto:mchrzasz@cern.ch}{mchrzasz@cern.ch}, \href{mailto:dany.van.dyk@gmail.com}{danny.van.dyk@gmail.com}} \end{column} \begin{column}{0.4\textwidth} \includegraphics[height=1.3cm]{uzh-transp} \end{column} \end{columns} \vspace{1em} \vspace{0.5em} \textcolor{normal text.fg!50!Comment}{Numerical Methods, \\ 26. September, 2016} \end{center} \end{frame} } \begin{frame}{Plan for today} \begin{itemize} \item \alert{General problem of finding the root of a function}\\ $\xi$ is a root of $f$ iff $f(\xi) = 0$. \vfill \item \alert{Begin with $D=1$}\\ What properties of $f(x)$ can we use to find one, any, or all roots of $f$? What are the requirements on $f(x)$? \vfill \item \alert{How about $D>1$?}\\ Can we generalize root finding from $D=1$ do arbitrary $D$? \end{itemize} \end{frame} \begin{frame}{General iterative procedure} Let's assume that \begin{itemize} \item $f(x)$ is a our function of interest, \item $\xi$ is the only root of $f$, \item we have a point $x_0$ close to $\xi$ \end{itemize} We now want to find a sequence $\lbrace x_0, x_1, \dots \rbrace$ that \begin{enumerate} \item converges toward $\xi$: $\lim_{k\to \infty} x_k = \xi$, \item is iterative: $x_{k + 1} = \Phi[f](x_k)$ \end{enumerate} We can attempt to Taylor expand $f$ around $x_0$ to order $N$, in order to obtain the generator $\Phi[f]$ for the iteration: \begin{block}{Expansion around $x_0$} \begin{equation*} f(\xi) = 0 = \sum_{n=0}^N \frac{(\xi - x_0)^n}{n!} f^{(n)}(x_0) \end{equation*} where $f^{(n)}(x_0)$ is the $n$th derivative of $f$ at the position $x_0$. \end{block} \end{frame} \begin{frame}{Newton-Raphson method \hfill ($N=1$)} \begin{block}{Expansion for $N=1$} \begin{equation} \tag{*} f(\xi) = 0 = f(x_0) + (\xi - x_0) \cdot f'(x_0) + \mathcal{O}((\xi - x_0)^2) \end{equation} \end{block} \vfill Algorithm: \begin{enumerate} \item start with index $k=0$ \item solve equation (*), assuming a vanshing approximation error: \begin{gather*} x_{k + 1} \leftarrow x_k - \frac{f(x_k)}{f'(x_k)} \approx \xi + \mathcal{O}((\xi - x_k)^2)\\ \Phi[f](x) \equiv x - \frac{f(x)}{f'(x)} \end{gather*} \item if $f(x_{k + 1}) \leq t$, where $t$ is an a-prior threshold, then stop; otherwise jump back to step \#2. \end{enumerate} \end{frame} \begin{frame}{Illustration of Newton-Raphson} \begin{columns} \begin{column}[T]{.5\textwidth} \begin{overlayarea}{\textwidth}{\textheight} $\bm{k=0}$: \only<2->{% \begin{equation*} x_0 := 1 \end{equation*} } \only<3->{% $\bm{k=1}$: \begin{align*} x_0 & = 1 & f(x_0) & = +0.54 \\ & & f'(x_0) & = -0.84 \\ \end{align*} } \only<4->{% \begin{equation*} {\color{red} x_1 \leftarrow 1.64} \end{equation*} } \only<6->{% $\bm{k=2}$: \begin{align*} x_1 & = 1.64 & f(x_0) & = -0.07 \\ & & f'(x_0) & = -0.997 \\ \end{align*} } \only<7->{% \begin{equation*} {\color{red} x_2 \leftarrow 1.57} \end{equation*} } \end{overlayarea} \end{column} \begin{column}[T]{.5\textwidth} \hspace{-1cm} \resizebox{1.1\textwidth}{!}{ \begin{tikzpicture} \begin{axis}[% axis x line=center, axis y line=center, ymin=-0.2,ymax=0.8, xmin=0.0,xmax=1.8, title=$\cos(x)$ ] \addplot[thick,black,domain=0:3.14] { cos(deg(x)) }; \only<2-3>{ \addplot[thick,blue,only marks,mark=+,mark size=5pt] coordinates { ( 1, 0.54030 ) }; } \only<3-4>{ \addplot+[thick,blue,domain=0:3.14,mark=none] { 0.54030 - 0.84147 * (x - 1) }; } \only<4-5>{ \addplot[thick,red,only marks,mark=o,mark size=5pt] coordinates { ( 1.6421, 0.0 ) }; \draw[thick,red] (axis cs:1.6421, 0) -- (axis cs:1.6421, -0.0712); } \only<5-6>{ \addplot[thick,blue,only marks,mark=+,mark size=5pt] coordinates { ( 1.6421, -0.0712 ) }; } \only<6-7>{ \addplot+[thick,blue,domain=0:3.14,mark=none] { -0.0712 - 0.99746 * (x - 1.6421) }; } \only<7->{ \addplot[thick,red,only marks,mark=o,mark size=5pt] coordinates { ( 1.5707, 7.8e-5 ) }; } \end{axis} \end{tikzpicture} } \vfill \begin{overlayarea}{\textwidth}{.3\textheight} \only<8->{ \begin{tabular}{c|cc} $k$ & $x_k$ & $f(x_k)$\\ \hline $0$ & $1.00$ & $+0.54$\\ $1$ & $1.64$ & $-0.07$\\ $2$ & $1.57$ & $+0.00$\\ \hline $\xi$&$1.5708$ & $0$ \end{tabular} } \end{overlayarea} \end{column} \end{columns} \end{frame} \begin{frame}{Modified Newton-Raphson method \hfill ($N=2$)} \begin{block}{Expansion for $N=2$} \vspace{-2\medskipamount} \begin{equation} \tag{*} f(\xi) = 0 = f(x_0) + (\xi - x_0) \cdot f'(x_0) + \frac{1}{2} (\xi - x_0)^2 f''(x_0) + \mathcal{O}((\xi - x_0)^3) \end{equation} \end{block} \vfill Algorithm: \begin{enumerate} \item start with index $k=0$ \item solve equation (*), assuming a vanshing approximation error: \begin{align*} a_\pm & \leftarrow x_k - \frac{f'(x_k) \pm \sqrt{[f'(x_k)]^2 - 2 f(x_k) f''(x_k)}}{f''(x_k)}\\ & \approx \xi + \mathcal{O}((\xi - x_k)^3) \end{align*} \item if $|f(a_+)| < |f(a_-)|$, then $x_{k + 1} \leftarrow a_+$; otherwise $x_{k + 1} \leftarrow a_-$ \item if $f(x_{k + 1}) \leq t$, where $t$ is an a-priori threshold, then stop; otherwise jump back to step \#2. \end{enumerate} \end{frame} \begin{frame}{Rate of convergence} Does this iterative procedure converge? If yes, how fast? How can we quantify the rate of convergence? \begin{itemize} \item we have \alert{local} convergence of order $p \geq 1$, if for all $x \in U(\xi)$ \begin{equation*} || \Phi[f](x) - \xi|| \leq C \cdot || x - \xi||^p\,,\quad\text{with}\quad C \geq 0\,. \end{equation*} Note: if $p = 1$ then we must have $C < 1$. \item we have \alert{global} convergence if $U(\xi) = \mathbb{R}$ \item for $D=1$ we can calculate $p$ if $\Phi[f](x)$ is differentiable to sufficient degree: \begin{align*} \Phi(x) - \xi = \Phi(x) - \Phi(\xi) = \frac{(x - \xi)^p}{p!} + o(||x - \xi||^p) \end{align*} \end{itemize} \end{frame} \begin{frame}{Rate of convergence for Newton-Raphson} We have $\Phi(x) \equiv \Phi[f](x)$: \begin{itemize} \item $\Phi(\xi) = \xi$ by construction \item $\Phi'(\xi) = \frac{f(\xi) \cdot f''(\xi)}{[f'(\xi)]^2} = 0$ \alert{(*)} by construction \item $\Phi''(\xi) = \frac{f''(\xi)}{f'(\xi)}$ \end{itemize} We can therefore write \begin{equation} \Phi(x) - \Phi(\xi) = \frac{(x - \xi)^2}{2!} \Phi''(\xi) + o(||x - \xi||^2)\,. \end{equation} The Newton-Raphson method converges therefore \emph{at least quadratically}, i.e.: it is a second-order method. \vfill \alert{(*): only if $f'(\xi) \neq 0$}, which is equivalent to $\xi$ is a simple root of $f$. \end{frame} \begin{frame}{Pathological Example} \begin{columns} \begin{column}[T]{.5\textwidth} \begin{overlayarea}{\textwidth}{\textheight} $\bm{k=0}$: \only<2->{% \begin{equation*} x_0 := 0.5 \end{equation*} } \only<3->{% $\bm{k=1}$: \begin{align*} x_0 & = 0.5 & f(x_0) & = +0.707 \\ & & f'(x_0) & = -0.707 \\ \end{align*} } \only<4->{% \begin{equation*} {\color{red} x_1 \leftarrow -0.5} \end{equation*} } \only<6->{% $\bm{k=2}$: \begin{align*} x_1 & = -0.5 & f(x_0) & = +0.707 \\ & & f'(x_0) & = -0.707 \\ \end{align*} } \only<7->{% \begin{equation*} {\color{red} x_2 \leftarrow +0.5} \end{equation*} } \end{overlayarea} \end{column} \begin{column}[T]{.5\textwidth} \hspace{-1cm} \resizebox{1.1\textwidth}{!}{ \begin{tikzpicture} \begin{axis}[% axis x line=center, axis y line=center, ymin=-0.1,ymax=0.9, xmin=-0.7,xmax=0.7, title=$\sqrt{|x|}$, samples=160 ] \addplot[thick,black,domain=-1:+0] { sqrt(abs(x)) }; \addplot[thick,black,domain=-0:+1] { sqrt(abs(x)) }; \only<2-3>{ \addplot[thick,blue,only marks,mark=+,mark size=5pt] coordinates { ( +0.5, 0.707 ) }; } \only<3-4>{ \addplot+[thick,blue,domain=-1:1,mark=none] { 0.707 + 0.707 * (x - 0.5) }; } \only<4-5>{ \addplot[thick,red,only marks,mark=o,mark size=5pt] coordinates { ( -0.5, 0.707 ) }; \draw[thick,red] (axis cs:-0.5, 0) -- (axis cs:-0.5, +0.707); } \only<5-6>{ \addplot[thick,blue,only marks,mark=+,mark size=5pt] coordinates { ( -0.5, 0.707 ) }; } \only<6-7>{ \addplot+[thick,blue,domain=-1:1,mark=none] { 0.707 - 0.707 * (x + 0.5) }; } \only<7->{ \addplot[thick,red,only marks,mark=o,mark size=5pt] coordinates { ( 0.5, 0.707 ) }; \draw[thick,red] (axis cs:+0.5, 0) -- (axis cs:+0.5, +0.707); } \end{axis} \end{tikzpicture} } \vfill \begin{overlayarea}{\textwidth}{.3\textheight} ~\\\bigskip endless loop \end{overlayarea} \end{column} \end{columns} \end{frame} \begin{frame}{Pitfalls of Newton-Raphson} We had previously discussed that Newton-Raphson is guaranteed to converge in an environment $U(\xi)$ of the root $\xi$.\\ \medskip However, it may fail to converge or converge only very slowly\dots \begin{itemize} \item if $U(\xi)$ is a small interval and the initial point lies outside of $U$, \item if the algorithm encounters a stationary point $\chi$ of $f$ (i.e. $f'(\chi) = 0$), \item if $\xi$ is a $m$-multiple root of $f$ (i.e. $f(x) \sim (x - \xi)^m \dots$) \end{itemize} The last point can be accomodated for: If you know the mulitplicity $m$ of the root, modify the Newton-Raphson step to read: \begin{equation*} x_{k + 1} \leftarrow x_{k} - m \frac{f(x_k)}{f'(x_k)} \end{equation*} Basically, this rescales the derivative from $f'(x) \to f'(x) / m$. \end{frame} \begin{frame}{Honorable mention: Horner Scheme} If $f(x) \equiv p_n(x)$ is a polynomial of degree $n$ in $x$, there might be multiple roots of $f$, i.e. roots $\xi_{n}$ with $f'(\xi_n) = 0$.\\ \vfill The Horner Scheme allows to efficiently calculate all the (multiple) roots of the polynomial $p_n(x)$.\\ \vfill However, this is of limited use: The polynomials dominantly arise in numerical computations as the characteric polynomial $\chi_M$ of a matrix $M$. In these cases, the roots of $\chi_M$ correspond to eigenvalue of $M$. However, the are better/more stables ways to \alert{numerically} compute all eigenvalues of $M$, instead of finding all root of $\chi_M$. \end{frame} \begin{frame}{Intermission: Derivatives} Both the original and the modified Newton-Raphson methods involve taking the derivative $f'$ of our target function $f$ at arbitrary points. Of course, if we can analytically calculate the derivative, there is no problem.\\ \vfill What about functions that we can only evaluate numerically? (Think: Experiments, numerical calculations)\\ \vfill Enter the field of \emph{numerical differentiation}! \end{frame} \begin{frame}{Intermission: Derivatives} How can we obtain the derivative $f'(x)$ numerically? Let's start from an expansion of $f(x)$ close to the point of interest $x_0$: \begin{tikzpicture} \node at (0,0) { \begin{minipage}{\textwidth} \begin{align*} f(x_0 + 2 h) & = f(x_0) + f'(x_0) \cdot 2 h + \frac{f''(x_0)}{2} \cdot 4 h^2 + \dots\\ f(x_0 + \phantom2 h) & = f(x_0) + f'(x_0) \cdot \phantom2 h + \frac{f''(x_0)}{2} \cdot \phantom4 h^2 + \dots\\ f(x_0 {\color{white}+ 2h}) & = f(x_0)\\ f(x_0 - \phantom2 h) & = f(x_0) - f'(x_0) \cdot \phantom2 h + \frac{f''(x_0)}{2} \cdot \phantom4 h^2 + \dots\\ f(x_0 - 2 h) & = f(x_0) - f'(x_0) \cdot 2 h + \frac{f''(x_0)}{2} \cdot 4 h^2 + \dots \end{align*} \end{minipage} }; %\draw[step=0.50,black,thin] (-4.5,-2.0) grid (4.5,2.0); \draw[green!75!black,thick,rounded corners=5pt] (-4.5,+0.15) rectangle (+4.5,+1.05); \only<1>{ \draw[red, thick,rounded corners=5pt] (-4.5,-0.5) rectangle (+4.5,+0.1); } \only<2>{ \draw[red, thick,rounded corners=5pt] (-4.5,-1.5) rectangle (+4.5,-0.5); } \end{tikzpicture} \begin{overlayarea}{\textwidth}{.4\textheight} \only<1>{ \begin{block}{Variant \#1: Forward difference quotient} \begin{equation*} \frac{{\color{green!75!black}f(x_0 + h)} - {\color{red!90!black}f(x_0)}}{h} = f'(x_0) + \frac{f''(x_0)}{2} \cdot h = f'(x_0) + o(h) \end{equation*} \end{block} } \only<2>{ \begin{block}{Variant \#2: Central difference quotient} \vspace{-1.5\medskipamount} \begin{equation*} \frac{{\color{green!75!black}f(x_0 + h)} - {\color{red!90!black}f(x_0 - h)}}{2h} = f'(x_0) + \frac{f'''(x_0)}{3} h^2 = f'(x_0) + o(h^2) \end{equation*} \end{block} } \end{overlayarea} \end{frame} \begin{frame}{Intermission: How to choose $h$?} \begin{columns} \begin{column}{.5\textwidth} \includegraphics[width=\textwidth]{eps.png}\\ {\tiny \hfill source: Wikipedia} \begin{itemize} \item two sources of numerical errors compete: \begin{itemize} \item formula errors, and \item round-off (finite precision) errors \end{itemize} \end{itemize} \end{column} \begin{column}{.5\textwidth} \begin{itemize} \item formula error can be reduced through ``intelligent formulae'' \item round-off error can be reduced through (schematically): \begin{align*} x_\text{approx} & \leftarrow x + h\\ h_\text{approx} & \leftarrow x_\text{approx} - x\\ f' & \leftarrow \frac{f(x_\text{approx}) - f(x)}{h_\text{approx}} \end{align*} \item optimal choice of $h$ will be close to \begin{equation*} h \sim \sqrt{\varepsilon} \cdot x \end{equation*} with $\varepsilon$: working precision \end{itemize} \end{column} \end{columns} \end{frame} \begin{frame}{Intermission: Higher derivatives} What about $f''(x_0)$, $f^{(3)}(x_0)$ or even higher derivatives?\\ \medskip \begin{block}{Central difference quotient for $f^{(n)}$} \begin{align*} \sum_{k=0}^n (-1)^k \binom{n}{k} \frac{f(x_0 + (n - 2 k) h)}{(2 h)^n} & = f^{(n)}(x_0) + o(h^2) \end{align*} \end{block} \end{frame} \newcommand{\del}{\partial} \begin{frame}{Intermission: Multiple partial derivatives} What about (multiple) partial derivatives $\del_x \del_y f(x, y)$?\\ \medskip \begin{block}{Central difference ``stencil'' for $\del_x \del_y f(x, y)$} Combine what we learned previously: \begin{multline*} \frac{\only<1>{a}\only<2>{+} f(+ h_x, + h_y) \only<1>{+b}\only<2>{-} f(+ h_x, - h_y) \only<1>{+c}\only<2>{-} f(- h_x, + h_y) \only<1>{+d}\only<2>{+} f(- h_x, - h_y)}{4 h_x h_y}\\ = \only<1>{\dots}\only<2>{\del x\del_y f(x,y)|_{x=0,y=0} + o(h_x^2) + o(h_y^2)} \end{multline*} \end{block} \vfill \only<2>{End of intermission.} \end{frame} \begin{frame}{Newton-Raphson for $D > 1$?} What happens if we generalize to $D > 1$?\\ \medskip Let's consider the case $D=2$ first, for a polynomial of degree $1$ \begin{equation*} f(x_1, x_2) = (1 - x) \cdot (2 - y) \end{equation*} \medskip $f$ has infinitely many roots: \begin{itemize} \item $x = 1$ and $y$ arbitrary \item $y = 2$ and $x$ arbitrary \end{itemize} In order to pin-point only one point in $D=2$, we need to solve $2$ equations: \begin{align*} f_1(x_1, x_2) & = 0, & f_2(x_1, x_2) & = 0 \end{align*} In $D$ dimensions, we will need to solve $D$ simultaneous equations! \end{frame} \begin{frame}{Generalization to $D > 1$} Substitute: \begin{align*} f(x) \in \mathbb{R} \quad & \to \quad f(x) \in \mathbb{R}^D\\ x \in \mathbb{R} \quad & \to \quad x \in \mathbb{R}^D\\ f'(x) \in \mathbb{R} \quad & \to \quad [D_f]_{ij} \equiv \frac{\del f_i(x)}{\del x_j} \in \mathbb{R}^{D\times D} \end{align*} Then the Newton-Raphson steps reads \begin{equation*} x^{(k+1)} \leftarrow x^{(k)} - \left[D_f(x^{(k)})\right]^{-1} f(x^{(k)})\,. \end{equation*} Problems: \begin{itemize} \item the functional matrix $D_f(x^{(k)})$ might be singular, or numerically close to being singular \item inverting $D_f$ is generally computationally expensive! \end{itemize} In the next two lectures, Marcin will discuss how to solve systems of linear equations. These methods will also introduce you to methods to numerically compute the above $x^{(k + 1)}$ from $D_f(x^{(k)})$. \end{frame} \backupbegin \begin{frame}\frametitle{Backup} \end{frame} \backupend \end{document}