From 09cfaea7323386ffc9edcce20aa9c3f475283ff6 Mon Sep 17 00:00:00 2001
From: patnr <patrick.n.raanes@gmail.com>
Date: Tue, 24 Sep 2024 19:15:32 +0200
Subject: [PATCH] Misc

---
 notebooks/T2 - Gaussian distribution.ipynb      | 10 +++++-----
 notebooks/T3 - Bayesian inference.ipynb         | 10 ++++++----
 notebooks/resources/answers.py                  |  9 +++++----
 notebooks/scripts/T2 - Gaussian distribution.py | 10 +++++-----
 notebooks/scripts/T3 - Bayesian inference.py    | 10 ++++++----
 5 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/notebooks/T2 - Gaussian distribution.ipynb b/notebooks/T2 - Gaussian distribution.ipynb
index 385597e..3dc2b73 100644
--- a/notebooks/T2 - Gaussian distribution.ipynb	
+++ b/notebooks/T2 - Gaussian distribution.ipynb	
@@ -288,7 +288,7 @@
     "It is important to recognize how similar eqn. (GM) is to the univariate (scalar) case (G1).\n",
     "Moreover, [as above](#Exc-(optional)----Integrals) it can be shown that\n",
     "- $\\mathbf{\\mu} = \\Expect[\\x]$,\n",
-    "- $\\mathbf{\\Sigma} \\mathrel{≔} \\Expect[(\\x-\\mu)(\\x-\\mu)\\tr]$.\n",
+    "- $\\mathbf{\\Sigma} = \\Expect[(\\x-\\mu)(\\x-\\mu)\\tr]$.\n",
     "\n",
     "Note that that the elements of $\\mathbf{\\Sigma}$ are individual covariances,\n",
     "$\\Sigma_{i,j} = \\Expect[(x_i-\\mu_i)(x_j-\\mu_j)] = \\mathbb{Cov}(x_i, x_j)$.\n",
@@ -388,10 +388,10 @@
     "**Exc -- Correlation disambiguation:**\n",
     "* What's the difference between correlation and covariance?\n",
     "* What's the difference between non-zero (C) correlation (or covariance) and (D) dependence?\n",
-    "  *Hint: consider this [image](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#/media/File:Correlation_examples2.svg).  \n",
-    "  Does $C \\Rightarrow D$ or the converse?  \n",
-    "  What about the negation, $\\neg D \\Rightarrow \\neg C$, or its converse?*  \n",
-    "  What about the the (jointly) Gaussian case?\n",
+    "  *Hint: consider this [image](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#/media/File:Correlation_examples2.svg).*  \n",
+    "  - Does $C \\Rightarrow D$ or the converse?  \n",
+    "  - What about the negation, $\\neg D \\Rightarrow \\neg C$, or its converse?*  \n",
+    "  - What about the the (jointly) Gaussian case?\n",
     "* Does correlation (or dependence) imply causation?\n",
     "* Suppose $x$ and $y$ have non-zero correlation, but neither one causes the other.\n",
     "  Does information about $y$ give you information about $x$?"
diff --git a/notebooks/T3 - Bayesian inference.ipynb b/notebooks/T3 - Bayesian inference.ipynb
index 0e2762c..ed52984 100644
--- a/notebooks/T3 - Bayesian inference.ipynb	
+++ b/notebooks/T3 - Bayesian inference.ipynb	
@@ -183,6 +183,8 @@
     "The normalisation is only necessary because of the *convention* that all densities integrate to $1$.\n",
     "However, for large models, we usually can only afford to evaluate $p(y|x)$ at a few points (of $x$), so that the integral for $p(y)$ can only be roughly approximated. In such settings, estimation of the normalisation factor becomes an important question too.\n",
     "\n",
+    "## Interactive illustration\n",
+    "\n",
     "The code below shows Bayes' rule in action, for prior $p(x) = \\NormDist(x|x^f, P^f)$ and likelihood, $p(y|x) = \\NormDist(y|x, R)$. The parameters of the prior are fixed at $x^f= 10$, $P^f=4^2$ (this ugly mean & variance notation is a necessary evil for later). The parameters of the likelihood are controlled through the interactive sliders."
    ]
   },
@@ -268,12 +270,12 @@
    "source": [
     "## With forward (observation) models\n",
     "Likelihoods are not generally as simple as the ones we saw above.\n",
-    "That could be because the unknown to be estimated controls some other aspect\n",
-    "of the measurement sampling distribution than merely the location.\n",
-    "However, we are mainly interested in the case when the measurement is generated via some observation model.\n",
+    "That could be because the unknown is not simply the mean parameter,\n",
+    "but rather the (co-)variance, or some other characteristic of the sampling distribution.\n",
+    "Or, as is usually the case for us, the unknown is an input to some \"observation (forward) model\".\n",
     "\n",
     "Suppose the observation, $y$, is related to the true state, $x$,\n",
-    "  via some \"observation (forward) model\", $\\ObsMod$:\n",
+    "  via some observation model, $\\ObsMod$:\n",
     "  \\begin{align*}\n",
     "  y &= \\ObsMod(x) + r \\,, \\;\\; \\qquad \\tag{Obs}\n",
     "  \\end{align*}\n",
diff --git a/notebooks/resources/answers.py b/notebooks/resources/answers.py
index 53e8163..677427c 100644
--- a/notebooks/resources/answers.py
+++ b/notebooks/resources/answers.py
@@ -172,12 +172,13 @@ def setup_typeset():
 
 answers['Gauss integrals'] = ['MD', r'''
 (i) $$\begin{align} \Expect[x]
+&= \int x \, p(x) \,d x \tag{by definition} \\\
 &= \int x \, c \, e^{-(x-\mu)^2 / 2 \sigma^2} \,d x \tag{by definition} \\\
 &= \int (u + \mu) \, c \, e^{-u^2 / 2 \sigma^2} \,d u \tag{$u = x-\mu$}\\\
 &= \int u \, c \, e^{-u^2 / 2 \sigma^2} \,d u
-\;+\;  \mu \int \, c \, e^{-u^2 / 2 \sigma^2} \,d u \\\
+\;+\;  \mu \int \, c \, e^{-u^2 / 2 \sigma^2} \,d u \tag{distribute integral}\\\
 &= \big[-\sigma^2 \, c \, e^{-u^2 / 2 \sigma^2}\big]^{+\infty}_{-\infty}
-\;+\; \mu \, \Expect[1]
+\;+\; \mu \, \Expect[1] \tag{integrate-by-parts + identify}
 \end{align}
 $$
 The first term is zero. The second leaves only $\mu$, since $\Expect[1] = 1$.
@@ -185,8 +186,8 @@ def setup_typeset():
 (ii) $$\begin{align} \Expect[(x - \mu)^2]
 &= \int (x - \mu)^2 \, c \, e^{-(x-\mu)^2 / 2 \sigma^2} \,d x \tag{by definition} \\\
 &= \int u^2 \, c \, e^{-u^2 / 2 \sigma^2} \,d u \tag{$u = x-\mu$}\\\
-&= \int u \, \big[ u \, c \, e^{-u^2 / 2 \sigma^2} \big] \,d u \\\
-&= 0 - \int (1) \big[-\sigma^2 \, c \, e^{-u^2 / 2 \sigma^2}\big] \,d u \,,  \tag{Integrate by parts} \\\
+&= \int u \, \big[ u \, c \, e^{-u^2 / 2 \sigma^2} \big] \,d u \tag{$u^2 = u\, u$} \\\
+&= 0 - \int (1) \big[-\sigma^2 \, c \, e^{-u^2 / 2 \sigma^2}\big] \,d u \,,  \tag{integrate by parts} \\\
 \end{align}
 $$
 where the first term was zero for the same reason as above,
diff --git a/notebooks/scripts/T2 - Gaussian distribution.py b/notebooks/scripts/T2 - Gaussian distribution.py
index 019a21c..2d0be7c 100644
--- a/notebooks/scripts/T2 - Gaussian distribution.py	
+++ b/notebooks/scripts/T2 - Gaussian distribution.py	
@@ -171,7 +171,7 @@ def pdf_U1(x, mu, sigma2):
 # It is important to recognize how similar eqn. (GM) is to the univariate (scalar) case (G1).
 # Moreover, [as above](#Exc-(optional)----Integrals) it can be shown that
 # - $\mathbf{\mu} = \Expect[\x]$,
-# - $\mathbf{\Sigma} \mathrel{≔} \Expect[(\x-\mu)(\x-\mu)\tr]$.
+# - $\mathbf{\Sigma} = \Expect[(\x-\mu)(\x-\mu)\tr]$.
 #
 # Note that that the elements of $\mathbf{\Sigma}$ are individual covariances,
 # $\Sigma_{i,j} = \Expect[(x_i-\mu_i)(x_j-\mu_j)] = \mathbb{Cov}(x_i, x_j)$.
@@ -237,10 +237,10 @@ def plot_pdf_G2(corr=0.7, std_x=1):
 # **Exc -- Correlation disambiguation:**
 # * What's the difference between correlation and covariance?
 # * What's the difference between non-zero (C) correlation (or covariance) and (D) dependence?
-#   *Hint: consider this [image](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#/media/File:Correlation_examples2.svg).  
-#   Does $C \Rightarrow D$ or the converse?  
-#   What about the negation, $\neg D \Rightarrow \neg C$, or its converse?*  
-#   What about the the (jointly) Gaussian case?
+#   *Hint: consider this [image](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#/media/File:Correlation_examples2.svg).*  
+#   - Does $C \Rightarrow D$ or the converse?  
+#   - What about the negation, $\neg D \Rightarrow \neg C$, or its converse?*  
+#   - What about the the (jointly) Gaussian case?
 # * Does correlation (or dependence) imply causation?
 # * Suppose $x$ and $y$ have non-zero correlation, but neither one causes the other.
 #   Does information about $y$ give you information about $x$?
diff --git a/notebooks/scripts/T3 - Bayesian inference.py b/notebooks/scripts/T3 - Bayesian inference.py
index ea2bf3e..e8622e3 100644
--- a/notebooks/scripts/T3 - Bayesian inference.py	
+++ b/notebooks/scripts/T3 - Bayesian inference.py	
@@ -104,6 +104,8 @@ def Bayes_rule(prior_values, lklhd_values, dx):
 # The normalisation is only necessary because of the *convention* that all densities integrate to $1$.
 # However, for large models, we usually can only afford to evaluate $p(y|x)$ at a few points (of $x$), so that the integral for $p(y)$ can only be roughly approximated. In such settings, estimation of the normalisation factor becomes an important question too.
 #
+# ## Interactive illustration
+#
 # The code below shows Bayes' rule in action, for prior $p(x) = \NormDist(x|x^f, P^f)$ and likelihood, $p(y|x) = \NormDist(y|x, R)$. The parameters of the prior are fixed at $x^f= 10$, $P^f=4^2$ (this ugly mean & variance notation is a necessary evil for later). The parameters of the likelihood are controlled through the interactive sliders.
 
 @interact(y=(*bounds, 1), logR=(-3, 5, .5), top=[['y', 'logR']])
@@ -162,12 +164,12 @@ def plot(x, y, c, lbl):
 
 # ## With forward (observation) models
 # Likelihoods are not generally as simple as the ones we saw above.
-# That could be because the unknown to be estimated controls some other aspect
-# of the measurement sampling distribution than merely the location.
-# However, we are mainly interested in the case when the measurement is generated via some observation model.
+# That could be because the unknown is not simply the mean parameter,
+# but rather the (co-)variance, or some other characteristic of the sampling distribution.
+# Or, as is usually the case for us, the unknown is an input to some "observation (forward) model".
 #
 # Suppose the observation, $y$, is related to the true state, $x$,
-#   via some "observation (forward) model", $\ObsMod$:
+#   via some observation model, $\ObsMod$:
 #   \begin{align*}
 #   y &= \ObsMod(x) + r \,, \;\; \qquad \tag{Obs}
 #   \end{align*}