diff --git a/Lectures_my/NumMet/Lecture4/lecture4.tex b/Lectures_my/NumMet/Lecture4/lecture4.tex index d9c3352..7893275 100644 --- a/Lectures_my/NumMet/Lecture4/lecture4.tex +++ b/Lectures_my/NumMet/Lecture4/lecture4.tex @@ -256,7 +256,7 @@ & \approx \xi + \mathcal{O}((\xi - x_k)^3) \end{align*} \item if $|f(a_+)| < |f(a_-)|$, then $x_{k + 1} \leftarrow a_+$; otherwise $x_{k + 1} \leftarrow a_-$ - \item if $f(x_{k + 1}) \leq t$, where $t$ is an a-prior threshold, then stop; otherwise + \item if $f(x_{k + 1}) \leq t$, where $t$ is an a-priori threshold, then stop; otherwise jump back to step \#2. \end{enumerate} \end{frame} @@ -265,13 +265,13 @@ Does this iterative procedure converge? If yes, how fast? How can we quantify the rate of convergence? \begin{itemize} - \item we have \alert{local} convergence of order $p \leq 1$, if for all $x \in U(\xi)$ + \item we have \alert{local} convergence of order $p \geq 1$, if for all $x \in U(\xi)$ \begin{equation*} || \Phi[f](x) - \xi|| \leq C \cdot || x - \xi||^p\,,\quad\text{with}\quad C \geq 0\,. \end{equation*} Note: if $p = 1$ then we must have $C < 1$. \item we have \alert{global} convergence if $U(\xi) = \mathbb{R}$ - \item for $D=1$ we can calculate $p$ is $\Phi[f](x)$ differentiable to sufficient degree: + \item for $D=1$ we can calculate $p$ if $\Phi[f](x)$ is differentiable to sufficient degree: \begin{align*} \Phi(x) - \xi = \Phi(x) - \Phi(\xi) = \frac{(x - \xi)^p}{p!} + o(||x - \xi||^p) \end{align*} @@ -295,6 +295,127 @@ \alert{(*): only if $f'(\xi) \neq 0$}, which is equivalent to $\xi$ is a simple root of $f$. \end{frame} +\begin{frame}{Pathological Example} + \begin{columns} + \begin{column}[T]{.5\textwidth} + \begin{overlayarea}{\textwidth}{\textheight} + $\bm{k=0}$: + \only<2->{% + \begin{equation*} + x_0 := 0.5 + \end{equation*} + } + \only<3->{% + $\bm{k=1}$: + \begin{align*} + x_0 & = 0.5 & + f(x_0) & = +0.707 \\ + & & + f'(x_0) & = -0.707 \\ + \end{align*} + } + \only<4->{% + \begin{equation*} + {\color{red} x_1 \leftarrow -0.5} + \end{equation*} + } + \only<6->{% + $\bm{k=2}$: + \begin{align*} + x_1 & = -0.5 & + f(x_0) & = +0.707 \\ + & & + f'(x_0) & = -0.707 \\ + \end{align*} + } + \only<7->{% + \begin{equation*} + {\color{red} x_2 \leftarrow +0.5} + \end{equation*} + } + \end{overlayarea} + \end{column} + \begin{column}[T]{.5\textwidth} + \hspace{-1cm} + \resizebox{1.1\textwidth}{!}{ + \begin{tikzpicture} + \begin{axis}[% + axis x line=center, + axis y line=center, + ymin=-0.1,ymax=0.9, + xmin=-0.7,xmax=0.7, + title=$\sqrt(|x|)$, + samples=160 + ] + \addplot[thick,black,domain=-1:+0] { + sqrt(abs(x)) + }; + \addplot[thick,black,domain=-0:+1] { + sqrt(abs(x)) + }; + \only<2-3>{ + \addplot[thick,blue,only marks,mark=+,mark size=5pt] coordinates { + ( +0.5, 0.707 ) + }; + } + \only<3-4>{ + \addplot+[thick,blue,domain=-1:1,mark=none] { + 0.707 + 0.707 * (x - 0.5) + }; + } + \only<4-5>{ + \addplot[thick,red,only marks,mark=o,mark size=5pt] coordinates { + ( -0.5, 0.707 ) + }; + \draw[thick,red] (axis cs:-0.5, 0) -- (axis cs:-0.5, +0.707); + } + \only<5-6>{ + \addplot[thick,blue,only marks,mark=+,mark size=5pt] coordinates { + ( -0.5, 0.707 ) + }; + } + \only<6-7>{ + \addplot+[thick,blue,domain=-1:1,mark=none] { + 0.707 - 0.707 * (x + 0.5) + }; + } + \only<7->{ + \addplot[thick,red,only marks,mark=o,mark size=5pt] coordinates { + ( 0.5, 0.707 ) + }; + \draw[thick,red] (axis cs:+0.5, 0) -- (axis cs:+0.5, +0.707); + } + \end{axis} + \end{tikzpicture} + } + \vfill + \begin{overlayarea}{\textwidth}{.3\textheight} + ~\\\bigskip + endless loop + \end{overlayarea} + \end{column} + \end{columns} +\end{frame} + +\begin{frame}{Pitfalls of Newton-Raphson} + We had previously discussed that Newton-Raphson is guaranteed to converge + in an environment $U(\xi)$ of the root $\xi$.\\ \medskip + + However, it may fail to converge or converge only very slowly\dots + \begin{itemize} + \item if $U(\xi)$ is a small interval and the initial point lies outside of $U$, + \item if the algorithm encounters a stationary point $\chi$ of $f$ (i.e. $f'(\chi) = 0$), + \item if $\xi$ is a $m$-multiple root of $f$ (i.e. $f(x) \sim (x - \xi)^m \dots$) + \end{itemize} + + The last point can be accomodated for: If you know the mulitplicity $m$ of the root, + modify the Newton-Raphson step to read: + \begin{equation*} + x_{k + 1} \leftarrow x_{k} - m \frac{f(x_k)}{f'(x_k)} + \end{equation*} + Basically, this rescales the derivative from $f'(x) \to f'(x) / m$. +\end{frame} + \begin{frame}{Honorable mention: Horner Scheme} If $f(x) \equiv p_n(x)$ is a polynomial of degree $n$ in $x$, there might be multiple roots of $f$, i.e. roots $\xi_{n}$ with $f'(\xi_n) = 0$.\\ @@ -302,13 +423,176 @@ The Horner Scheme allows to efficiently calculate all the (multiple) roots of the polynomial $p_n(x)$.\\ \vfill - However, this is of limited use: These polynomials usually only arise in computing - the characteric polynomial $\chi_M$ of a matrix $M$. In this case, the roots of $\chi_M$ + However, this is of limited use: The polynomials dominantly arise in numerical computations + as the characteric polynomial $\chi_M$ of a matrix $M$. In these cases, the roots of $\chi_M$ correspond to eigenvalue of $M$. However, the are better/more stables ways to \alert{numerically} compute - all eigenvalues of $M$. + all eigenvalues of $M$, instead of finding all root of $\chi_M$. \end{frame} -\begin{frame}{Newton-Raphson for $D > 1$} +\begin{frame}{Intermission: Derivatives} + Both the original and the modified Newton-Raphson methods involve taking + the derivative $f'$ of our target function $f$ at arbitrary points. Of + course, if we can analytically calculate the derivative, there is no problem.\\ + + \vfill + + What about functions that we can only evaluate numerically? (Think: Experiments, + numerical calculations)\\ + + \vfill + + Enter the field of \emph{numerical differentiation}! +\end{frame} + +\begin{frame}{Intermission: Derivatives} + How can we obtain the derivative $f'(x)$ numerically? Let's start from an expansion of + $f(x)$ close to the point of interest $x_0$: + \begin{tikzpicture} + \node at (0,0) { + \begin{minipage}{\textwidth} + \begin{align*} + f(x_0 + 2 h) & = f(x_0) + f'(x_0) \cdot 2 h + \frac{f''(x_0)}{2} \cdot 4 h^2 + \dots\\ + f(x_0 + \phantom2 h) & = f(x_0) + f'(x_0) \cdot \phantom2 h + \frac{f''(x_0)}{2} \cdot \phantom4 h^2 + \dots\\ + f(x_0 {\color{white}+ 2h}) & = f(x_0)\\ + f(x_0 - \phantom2 h) & = f(x_0) - f'(x_0) \cdot \phantom2 h + \frac{f''(x_0)}{2} \cdot \phantom4 h^2 + \dots\\ + f(x_0 - 2 h) & = f(x_0) - f'(x_0) \cdot 2 h + \frac{f''(x_0)}{2} \cdot 4 h^2 + \dots + \end{align*} + \end{minipage} + }; + %\draw[step=0.50,black,thin] (-4.5,-2.0) grid (4.5,2.0); + \draw[green!75!black,thick,rounded corners=5pt] (-4.5,+0.15) rectangle (+4.5,+1.05); + \only<1>{ + \draw[red, thick,rounded corners=5pt] (-4.5,-0.5) rectangle (+4.5,+0.1); + } + \only<2>{ + \draw[red, thick,rounded corners=5pt] (-4.5,-1.5) rectangle (+4.5,-0.5); + } + \end{tikzpicture} + + + \begin{overlayarea}{\textwidth}{.4\textheight} + \only<1>{ + \begin{block}{Variant \#1: Forward difference quotient} + \begin{equation*} + \frac{{\color{green!75!black}f(x_0 + h)} - {\color{red!90!black}f(x_0)}}{h} = f'(x_0) + \frac{f''(x_0)}{2} \cdot h = f'(x_0) + o(h) + \end{equation*} + \end{block} + } + \only<2>{ + \begin{block}{Variant \#2: Central difference quotient} + \vspace{-1.5\medskipamount} + \begin{equation*} + \frac{{\color{green!75!black}f(x_0 + h)} - {\color{red!90!black}f(x_0 - h)}}{2h} = f'(x_0) + \frac{f'''(x_0)}{3} h^2 = f'(x_0) + o(h^2) + \end{equation*} + \end{block} + } + \end{overlayarea} +\end{frame} + +\begin{frame}{Intermission: How to choose $h$?} + \begin{columns} + \begin{column}{.5\textwidth} + \includegraphics[width=\textwidth]{eps.png}\\ + {\tiny \hfill source: Wikipedia} + \begin{itemize} + \item two sources of numerical errors compete: + \begin{itemize} + \item formula errors, and + \item round-off (finite precision) errors + \end{itemize} + \end{itemize} + \end{column} + \begin{column}{.5\textwidth} + \begin{itemize} + \item formula error can be reduced through ``intelligent formulae'' + \item round-off error can be reduced through (schematically): + \begin{align*} + x_\text{approx} + & \leftarrow x + h\\ + h_\text{approx} + & \leftarrow x_\text{approx} - x\\ + f' & \leftarrow \frac{f(x_\text{approx}) - f(x)}{h_\text{approx}} + \end{align*} + \item optimal choice of $h$ will be close to + \begin{equation*} + h \sim \sqrt{\varepsilon} \cdot x + \end{equation*} + with $\varepsilon$: working precision + \end{itemize} + \end{column} + \end{columns} +\end{frame} + +\begin{frame}{Intermission: Higher derivatives} + What about $f''(x_0)$, $f^{(3)}(x_0)$ or even higher derivatives?\\ + \medskip + \begin{block}{Central difference quotient for $f^{(n)}$} + \begin{align*} + \sum_{k=0}^n (-1)^k \binom{n}{k} \frac{f(x_0 + (n - 2 k) h)}{(2 h)^n} + & = f^{(n)}(x_0) + o(h^2) + \end{align*} + \end{block} +\end{frame} + +\newcommand{\del}{\partial} +\begin{frame}{Intermission: Multiple partial derivatives} + What about (multiple) partial derivatives $\del_x \del_y f(x, y)$?\\ + \medskip + \begin{block}{Central difference ``stencil'' for $\del_x \del_y f(x, y)$} + Combine what we learned previously: + \begin{multline*} + \frac{\only<1>{a}\only<2>{+} f(+ h_x, + h_y) \only<1>{+b}\only<2>{-} f(+ h_x, - h_y) \only<1>{+c}\only<2>{-} f(- h_x, + h_y) \only<1>{+d}\only<2>{+} f(- h_x, - h_y)}{4 h_x h_y}\\ + = \only<1>{\dots}\only<2>{\del x\del_y f(x,y)|_{x=0,y=0} + o(h_x^2) + o(h_y^2)} + \end{multline*} + \end{block} + \vfill + \only<2>{End of intermission.} +\end{frame} + +\begin{frame}{Newton-Raphson for $D > 1$?} + What happens if we generalize to $D > 1$?\\ + \medskip + Let's consider the case $D=2$ first, for a polynomial of degree $1$ + \begin{equation*} + f(x_1, x_2) = (1 - x) \cdot (2 - y) + \end{equation*} + \medskip + $f$ has infinitely many roots: + \begin{itemize} + \item $x = 1$ and $y$ arbitrary + \item $y = 2$ and $x$ arbitrary + \end{itemize} + In order to pin-point only one point in $D=2$, we need to solve $2$ + equations: + \begin{align*} + f_1(x_1, x_2) & = 0, & + f_2(x_1, x_2) & = 0 + \end{align*} + In $D$ dimensions, we will need to solve $D$ simultaneous equations! +\end{frame} + +\begin{frame}{Generalization to $D > 1$} + Substitute: + \begin{align*} + f(x) \in \mathbb{R} \quad & \to \quad f(x) \in \mathbb{R}^D\\ + x \in \mathbb{R} \quad & \to \quad x \in \mathbb{R}^D\\ + f'(x) \in \mathbb{R} \quad & \to \quad [D_f]_{ij} \equiv \frac{\del f_i(x)}{\del x_j} \in \mathbb{R}^{D\times D} + \end{align*} + + Then the Newton-Raphson steps reads + \begin{equation*} + x^{(k+1)} \leftarrow x^{(k)} - \left[D_f(x^{(k)})\right]^{-1} f(x^{(k)})\,. + \end{equation*} + Problems: + \begin{itemize} + \item the functional matrix $D_f(x^{(k)})$ might be singular, or numerically close to being + singular + \item inverting $D_f$ is generally computationally expensive! + \end{itemize} + + In the next two lectures, Marcin will discuss how to solve systems of linear equations. + These methods will also introduce you to methods to numerically compute the above $x^{(k + 1)}$ + from $D_f(x^{(k)})$. \end{frame} \backupbegin