diff --git a/notebooks/T2 - Gaussian distribution.ipynb b/notebooks/T2 - Gaussian distribution.ipynb index 385597e..3dc2b73 100644 --- a/notebooks/T2 - Gaussian distribution.ipynb +++ b/notebooks/T2 - Gaussian distribution.ipynb @@ -288,7 +288,7 @@ "It is important to recognize how similar eqn. (GM) is to the univariate (scalar) case (G1).\n", "Moreover, [as above](#Exc-(optional)----Integrals) it can be shown that\n", "- $\\mathbf{\\mu} = \\Expect[\\x]$,\n", - "- $\\mathbf{\\Sigma} \\mathrel{≔} \\Expect[(\\x-\\mu)(\\x-\\mu)\\tr]$.\n", + "- $\\mathbf{\\Sigma} = \\Expect[(\\x-\\mu)(\\x-\\mu)\\tr]$.\n", "\n", "Note that that the elements of $\\mathbf{\\Sigma}$ are individual covariances,\n", "$\\Sigma_{i,j} = \\Expect[(x_i-\\mu_i)(x_j-\\mu_j)] = \\mathbb{Cov}(x_i, x_j)$.\n", @@ -388,10 +388,10 @@ "**Exc -- Correlation disambiguation:**\n", "* What's the difference between correlation and covariance?\n", "* What's the difference between non-zero (C) correlation (or covariance) and (D) dependence?\n", - " *Hint: consider this [image](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#/media/File:Correlation_examples2.svg). \n", - " Does $C \\Rightarrow D$ or the converse? \n", - " What about the negation, $\\neg D \\Rightarrow \\neg C$, or its converse?* \n", - " What about the the (jointly) Gaussian case?\n", + " *Hint: consider this [image](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#/media/File:Correlation_examples2.svg).* \n", + " - Does $C \\Rightarrow D$ or the converse? \n", + " - What about the negation, $\\neg D \\Rightarrow \\neg C$, or its converse?* \n", + " - What about the the (jointly) Gaussian case?\n", "* Does correlation (or dependence) imply causation?\n", "* Suppose $x$ and $y$ have non-zero correlation, but neither one causes the other.\n", " Does information about $y$ give you information about $x$?" diff --git a/notebooks/T3 - Bayesian inference.ipynb b/notebooks/T3 - Bayesian inference.ipynb index 0e2762c..ed52984 100644 --- a/notebooks/T3 - Bayesian inference.ipynb +++ b/notebooks/T3 - Bayesian inference.ipynb @@ -183,6 +183,8 @@ "The normalisation is only necessary because of the *convention* that all densities integrate to $1$.\n", "However, for large models, we usually can only afford to evaluate $p(y|x)$ at a few points (of $x$), so that the integral for $p(y)$ can only be roughly approximated. In such settings, estimation of the normalisation factor becomes an important question too.\n", "\n", + "## Interactive illustration\n", + "\n", "The code below shows Bayes' rule in action, for prior $p(x) = \\NormDist(x|x^f, P^f)$ and likelihood, $p(y|x) = \\NormDist(y|x, R)$. The parameters of the prior are fixed at $x^f= 10$, $P^f=4^2$ (this ugly mean & variance notation is a necessary evil for later). The parameters of the likelihood are controlled through the interactive sliders." ] }, @@ -268,12 +270,12 @@ "source": [ "## With forward (observation) models\n", "Likelihoods are not generally as simple as the ones we saw above.\n", - "That could be because the unknown to be estimated controls some other aspect\n", - "of the measurement sampling distribution than merely the location.\n", - "However, we are mainly interested in the case when the measurement is generated via some observation model.\n", + "That could be because the unknown is not simply the mean parameter,\n", + "but rather the (co-)variance, or some other characteristic of the sampling distribution.\n", + "Or, as is usually the case for us, the unknown is an input to some \"observation (forward) model\".\n", "\n", "Suppose the observation, $y$, is related to the true state, $x$,\n", - " via some \"observation (forward) model\", $\\ObsMod$:\n", + " via some observation model, $\\ObsMod$:\n", " \\begin{align*}\n", " y &= \\ObsMod(x) + r \\,, \\;\\; \\qquad \\tag{Obs}\n", " \\end{align*}\n", diff --git a/notebooks/resources/answers.py b/notebooks/resources/answers.py index 53e8163..677427c 100644 --- a/notebooks/resources/answers.py +++ b/notebooks/resources/answers.py @@ -172,12 +172,13 @@ def setup_typeset(): answers['Gauss integrals'] = ['MD', r''' (i) $$\begin{align} \Expect[x] +&= \int x \, p(x) \,d x \tag{by definition} \\\ &= \int x \, c \, e^{-(x-\mu)^2 / 2 \sigma^2} \,d x \tag{by definition} \\\ &= \int (u + \mu) \, c \, e^{-u^2 / 2 \sigma^2} \,d u \tag{$u = x-\mu$}\\\ &= \int u \, c \, e^{-u^2 / 2 \sigma^2} \,d u -\;+\; \mu \int \, c \, e^{-u^2 / 2 \sigma^2} \,d u \\\ +\;+\; \mu \int \, c \, e^{-u^2 / 2 \sigma^2} \,d u \tag{distribute integral}\\\ &= \big[-\sigma^2 \, c \, e^{-u^2 / 2 \sigma^2}\big]^{+\infty}_{-\infty} -\;+\; \mu \, \Expect[1] +\;+\; \mu \, \Expect[1] \tag{integrate-by-parts + identify} \end{align} $$ The first term is zero. The second leaves only $\mu$, since $\Expect[1] = 1$. @@ -185,8 +186,8 @@ def setup_typeset(): (ii) $$\begin{align} \Expect[(x - \mu)^2] &= \int (x - \mu)^2 \, c \, e^{-(x-\mu)^2 / 2 \sigma^2} \,d x \tag{by definition} \\\ &= \int u^2 \, c \, e^{-u^2 / 2 \sigma^2} \,d u \tag{$u = x-\mu$}\\\ -&= \int u \, \big[ u \, c \, e^{-u^2 / 2 \sigma^2} \big] \,d u \\\ -&= 0 - \int (1) \big[-\sigma^2 \, c \, e^{-u^2 / 2 \sigma^2}\big] \,d u \,, \tag{Integrate by parts} \\\ +&= \int u \, \big[ u \, c \, e^{-u^2 / 2 \sigma^2} \big] \,d u \tag{$u^2 = u\, u$} \\\ +&= 0 - \int (1) \big[-\sigma^2 \, c \, e^{-u^2 / 2 \sigma^2}\big] \,d u \,, \tag{integrate by parts} \\\ \end{align} $$ where the first term was zero for the same reason as above, diff --git a/notebooks/scripts/T2 - Gaussian distribution.py b/notebooks/scripts/T2 - Gaussian distribution.py index 019a21c..2d0be7c 100644 --- a/notebooks/scripts/T2 - Gaussian distribution.py +++ b/notebooks/scripts/T2 - Gaussian distribution.py @@ -171,7 +171,7 @@ def pdf_U1(x, mu, sigma2): # It is important to recognize how similar eqn. (GM) is to the univariate (scalar) case (G1). # Moreover, [as above](#Exc-(optional)----Integrals) it can be shown that # - $\mathbf{\mu} = \Expect[\x]$, -# - $\mathbf{\Sigma} \mathrel{≔} \Expect[(\x-\mu)(\x-\mu)\tr]$. +# - $\mathbf{\Sigma} = \Expect[(\x-\mu)(\x-\mu)\tr]$. # # Note that that the elements of $\mathbf{\Sigma}$ are individual covariances, # $\Sigma_{i,j} = \Expect[(x_i-\mu_i)(x_j-\mu_j)] = \mathbb{Cov}(x_i, x_j)$. @@ -237,10 +237,10 @@ def plot_pdf_G2(corr=0.7, std_x=1): # **Exc -- Correlation disambiguation:** # * What's the difference between correlation and covariance? # * What's the difference between non-zero (C) correlation (or covariance) and (D) dependence? -# *Hint: consider this [image](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#/media/File:Correlation_examples2.svg). -# Does $C \Rightarrow D$ or the converse? -# What about the negation, $\neg D \Rightarrow \neg C$, or its converse?* -# What about the the (jointly) Gaussian case? +# *Hint: consider this [image](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#/media/File:Correlation_examples2.svg).* +# - Does $C \Rightarrow D$ or the converse? +# - What about the negation, $\neg D \Rightarrow \neg C$, or its converse?* +# - What about the the (jointly) Gaussian case? # * Does correlation (or dependence) imply causation? # * Suppose $x$ and $y$ have non-zero correlation, but neither one causes the other. # Does information about $y$ give you information about $x$? diff --git a/notebooks/scripts/T3 - Bayesian inference.py b/notebooks/scripts/T3 - Bayesian inference.py index ea2bf3e..e8622e3 100644 --- a/notebooks/scripts/T3 - Bayesian inference.py +++ b/notebooks/scripts/T3 - Bayesian inference.py @@ -104,6 +104,8 @@ def Bayes_rule(prior_values, lklhd_values, dx): # The normalisation is only necessary because of the *convention* that all densities integrate to $1$. # However, for large models, we usually can only afford to evaluate $p(y|x)$ at a few points (of $x$), so that the integral for $p(y)$ can only be roughly approximated. In such settings, estimation of the normalisation factor becomes an important question too. # +# ## Interactive illustration +# # The code below shows Bayes' rule in action, for prior $p(x) = \NormDist(x|x^f, P^f)$ and likelihood, $p(y|x) = \NormDist(y|x, R)$. The parameters of the prior are fixed at $x^f= 10$, $P^f=4^2$ (this ugly mean & variance notation is a necessary evil for later). The parameters of the likelihood are controlled through the interactive sliders. @interact(y=(*bounds, 1), logR=(-3, 5, .5), top=[['y', 'logR']]) @@ -162,12 +164,12 @@ def plot(x, y, c, lbl): # ## With forward (observation) models # Likelihoods are not generally as simple as the ones we saw above. -# That could be because the unknown to be estimated controls some other aspect -# of the measurement sampling distribution than merely the location. -# However, we are mainly interested in the case when the measurement is generated via some observation model. +# That could be because the unknown is not simply the mean parameter, +# but rather the (co-)variance, or some other characteristic of the sampling distribution. +# Or, as is usually the case for us, the unknown is an input to some "observation (forward) model". # # Suppose the observation, $y$, is related to the true state, $x$, -# via some "observation (forward) model", $\ObsMod$: +# via some observation model, $\ObsMod$: # \begin{align*} # y &= \ObsMod(x) + r \,, \;\; \qquad \tag{Obs} # \end{align*}