From a9199a2a5d0eba386bc78795fbe2e2ed4d0a73c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alexandre=20Ren=C3=A9?= <arene010@uottawa.ca>
Date: Fri, 29 Dec 2023 09:46:10 -0500
Subject: [PATCH] Compile docs from inlined comments.

- Add _toc.yml
- Add _config.yml
- Add references.bib, references.md
- Switch to pure pyproject.toml
- Switch to a src layout (avoids conflicts with other folders in root, like `figures`)
- Add GitHub action to build book (untested, from cookicutter)
---
 .github/workflows/deploy.yml                  |   54 +
 .gitignore                                    |    2 +
 README.md                                     |    2 +-
 _config.yml                                   |  108 ++
 _toc.yml                                      |   12 +
 pyproject.toml                                |   91 +-
 references.bib                                |   39 +
 references.md                                 |    5 +
 setup.cfg                                     |   85 -
 {emd_falsify => src/emd_falsify}/__init__.py  |    0
 src/emd_falsify/_version.py                   |   16 +
 src/emd_falsify/config/__init__.md            |  150 ++
 .../emd_falsify}/config/__init__.py           |   42 +-
 .../emd_falsify}/config/defaults.cfg          |    2 +-
 {emd_falsify => src/emd_falsify}/digitize.py  |    0
 src/emd_falsify/emd.md                        |  892 +++++++++
 {emd_falsify => src/emd_falsify}/emd.py       |  107 +-
 {emd_falsify => src/emd_falsify}/memoize.py   |    0
 src/emd_falsify/path_sampling.md              | 1598 +++++++++++++++++
 .../emd_falsify}/path_sampling.py             |  122 +-
 src/emd_falsify/tasks.md                      |  792 ++++++++
 {emd_falsify => src/emd_falsify}/tasks.py     |   20 +-
 {emd_falsify => src/emd_falsify}/utils.py     |    0
 src/emd_falsify/viz.md                        |  297 +++
 {emd_falsify => src/emd_falsify}/viz.py       |   61 +-
 25 files changed, 4288 insertions(+), 209 deletions(-)
 create mode 100644 .github/workflows/deploy.yml
 create mode 100644 _config.yml
 create mode 100644 _toc.yml
 create mode 100644 references.bib
 create mode 100644 references.md
 delete mode 100644 setup.cfg
 rename {emd_falsify => src/emd_falsify}/__init__.py (100%)
 create mode 100644 src/emd_falsify/_version.py
 create mode 100644 src/emd_falsify/config/__init__.md
 rename {emd_falsify => src/emd_falsify}/config/__init__.py (82%)
 rename {emd_falsify => src/emd_falsify}/config/defaults.cfg (98%)
 rename {emd_falsify => src/emd_falsify}/digitize.py (100%)
 create mode 100644 src/emd_falsify/emd.md
 rename {emd_falsify => src/emd_falsify}/emd.py (92%)
 rename {emd_falsify => src/emd_falsify}/memoize.py (100%)
 create mode 100644 src/emd_falsify/path_sampling.md
 rename {emd_falsify => src/emd_falsify}/path_sampling.py (94%)
 create mode 100644 src/emd_falsify/tasks.md
 rename {emd_falsify => src/emd_falsify}/tasks.py (98%)
 rename {emd_falsify => src/emd_falsify}/utils.py (100%)
 create mode 100644 src/emd_falsify/viz.md
 rename {emd_falsify => src/emd_falsify}/viz.py (91%)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
new file mode 100644
index 0000000..b815e66
--- /dev/null
+++ b/.github/workflows/deploy.yml
@@ -0,0 +1,54 @@
+name: deploy-book
+
+on:
+  # Trigger the workflow on push to main branch
+  push:
+    branches:
+      - main
+  # Also allow manual trigger
+  workflow_dispatch:
+
+env:
+  BASE_URL: /${{ github.event.repository.name }}
+
+# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
+# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  deploy-book:
+    runs-on: ubuntu-latest
+    # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+    permissions:
+      pages: write
+      id-token: write
+    steps:
+      - uses: actions/checkout@v3
+
+      # Install dependencies
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.11
+
+      - name: Install dependencies
+        run: |
+          pip install .
+
+      # Build the book
+      - name: Build the book
+        run: |
+          jupyter-book build emd-paper_notebooks
+
+      # Upload the book's HTML as an artifact
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v2
+        with:
+          path: "_build/html"
+
+      # Deploy the book's HTML to GitHub Pages
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v2
diff --git a/.gitignore b/.gitignore
index fd09e6f..22133da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
 build
 *.joblib-cache
 *.pytest_cache
+*.jupyter
+*.jupyter_cache
 
 purgatory
 emd_falsify/*.md
diff --git a/README.md b/README.md
index a1f053a..ee149b6 100644
--- a/README.md
+++ b/README.md
@@ -67,4 +67,4 @@ This will print messages to your console reporting how much time each computatio
 
 The current implementation of the hierarchical beta process (used for sampling quantile paths) has seen quite a lot of testing for numerical stability, but little optimization effort. In particular it makes a lot of calls to functions in `scipy.optimize`, which makes the whole function quite slow: even with a relatively complicated data model like the [pyloric circuit](https://alcrene.github.io/pyloric-network-simulator/pyloric_simulator/prinz2004.html), drawing quantile paths can still take 10x longer than generating the data.
 
-Substantial performance improvements to the sampling algorithm are almost certainly possible with dedicated computer science effort. Since that is the current bottleneck, that would also proportionately reduce compute time for the whole EMD procedure.
\ No newline at end of file
+Substantial performance improvements to the sampling algorithm are almost certainly possible with dedicated computer science effort. Since that is the current bottleneck, that would also proportionately reduce compute time for the whole EMD procedure.
diff --git a/_config.yml b/_config.yml
new file mode 100644
index 0000000..f860ced
--- /dev/null
+++ b/_config.yml
@@ -0,0 +1,108 @@
+#######################################################################################
+# A default configuration that will be loaded for all jupyter books
+# See the documentation for help and more options: 
+# https://jupyterbook.org/customize/config.html
+
+#######################################################################################
+# Book settings
+title                       : EMD-falsify  # The title of the book. Will be placed in the left navbar.
+author                      : Alexandre René  # The author of the book
+copyright                   : "2023"  # Copyright year to be placed in the footer
+logo                        : ""  # A path to the book logo
+exclude_patterns            : [_build, Thumbs.db, .DS_Store, "**.ipynb_checkpoints", ".*", "**.egg-info", "*.mypy_cache", "**__pycache__", "**/.pytest_cache",
+                               ".jupyter", ".jupyter_cache",
+                               "**purgatory", "**.smt"]
+
+# Force re-execution of notebooks on each build.
+# See https://jupyterbook.org/content/execute.html
+execute:
+  execute_notebooks: cache
+  cache            : ".jupyter_cache"
+  exclude_patterns : ["**purgatory",
+                      "tasks.md",  # Not executable as a notebook (Task is split over multiple cells)
+                    ]
+
+# Define the name of the latex output file for PDF builds
+latex:
+  latex_documents:
+    targetname: emd-falsify.tex
+
+# Parse and render settings
+parse:
+  myst_enable_extensions:  # default extensions to enable in the myst parser. See https://myst-parser.readthedocs.io/en/latest/using/syntax-optional.html
+    - amsmath
+    - colon_fence
+    - deflist
+    - dollarmath
+    # - html_admonition
+    - html_image
+    - linkify
+    - replacements
+    # - smartquotes
+    - substitution
+    - tasklist
+  myst_url_schemes: [mailto, http, https] # URI schemes that will be recognised as external URLs in Markdown links
+  myst_dmath_double_inline: true  # Allow display math ($$) within an inline context
+  myst_linkify_fuzzy_links: false
+  myst_dmath_allow_labels: true
+  myst_heading_anchors: 2
+  myst_substitutions:
+    prolog: |
+      ```{role} raw-latex(raw)
+          :format: latex
+      ```
+      ```{role} raw-html(raw)
+          :format: html
+      ```
+
+# Add a bibtex file so that we can create citations
+bibtex_bibfiles:
+  - references.bib
+
+# Information about where the book exists on the web
+repository:
+  url: https://github.com/alcrene/emd-falsify  # Online location of your book
+  path_to_book: docs  # Optional path to your book, relative to the repository root
+  branch: main  # Which branch of the repository should be used when creating links (optional)
+
+# Add GitHub buttons to your book
+# See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository
+html:
+  use_issues_button: true
+  use_repository_button: true
+
+# Sphinx options
+
+sphinx:
+  config:
+    mathjax_config:
+      # The `mtext` font is used for non-math symbols, like the content of `\text` commands.
+      # By inheriting this font, we ensure that textual elements in equations use the same font
+      # as the main text of our book. Otherwise text is rendered in the TeX Serif font, which looks out of place on a web page with sans serif.
+      # NB: Not just the font family, but the actual font is matched.
+      chtml:
+        mtextInheritFont: true
+      svg:
+        mtextInheritFont: true
+      tex:
+        macros:
+          "nN"    : "\\mathcal{N}"
+          "RR"    : "\\mathbb{R}"
+          "EE"   : "{\\mathbb{E}}"
+          "VV"   : "{\\mathbb{V}}"
+          "Unif"  : "\\mathop{\\mathrm{Unif}}"
+          "Beta" : "{\\mathop{\\mathrm{Beta}}}"
+          # emd.md
+          "D"     : "\\mathcal{D}"
+          "l"     : "l"
+          "Me"    : "\\mathcal{M}^ε"
+          "Philt" : ["\\widetilde{Φ}_{|#1}", 1]
+          "Bemd"  : ["B_{#1}^{\\mathrm{EMD}}", 1]
+          # path_sampling.md
+          "lnLtt": "{\\tilde{l}}"
+          "Mvar" : "{\\mathop{\\mathrm{Mvar}}}"
+          "pathP": "{\\mathop{\\mathcal{P}}}"
+          "lnLh" : "{\\hat{l}}"
+          "emdstd": "{\\tilde{σ}}"
+          "EMD"  : "{\\mathrm{EMD}}"
+
diff --git a/_toc.yml b/_toc.yml
new file mode 100644
index 0000000..2f57b00
--- /dev/null
+++ b/_toc.yml
@@ -0,0 +1,12 @@
+# Table of contents
+# Learn more at https://jupyterbook.org/customize/toc.html
+
+format: jb-book
+root: README
+chapters:
+- file: src/emd_falsify/emd
+- file: src/emd_falsify/path_sampling
+- file: src/emd_falsify/tasks
+- file: src/emd_falsify/viz
+- file: src/emd_falsify/config/__init__.md
+- file: references
diff --git a/pyproject.toml b/pyproject.toml
index 834753b..3ce6023 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,89 @@
 [build-system]
-requires = ["setuptools>=42", "wheel", "Cython", "setuptools_scm[toml]>=6.2"]
-build-backend = "setuptools.build_meta:__legacy__"
+requires = ["setuptools>=61.0.0", "setuptools-scm>=8.0"]
+build-backend = "setuptools.build_meta"
 
-# setuptools_scm allows to automatically include git-tracked files in package 
-[tool.setuptools_scm]
\ No newline at end of file
+[tool.setuptools_scm]
+write_to = "src/emd_falsify/_version.py"
+
+[project.urls]
+#"Documentation" =
+"Bug Tracker" = "https://github.com/alcrene/emd-falsify/issues"
+"Source" = "https://github.com/alcrene/emd-falsify"
+
+[project]
+name        = "emd-falsify"
+authors = [
+    {name = "Alexandre René", email="arene010@uottawa.ca"},
+]
+description = "Original implementation of the EMD (empirical model discrepancy) model comparison criterion"
+readme      = "README.md"
+requires-python = ">=3.7"
+
+license = {text = "MPL 2.0"}
+
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
+    "Intended Audience :: Science/Research",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Topic :: Scientific/Engineering",
+    "Topic :: Scientific/Engineering :: Mathematics",
+    "Topic :: Scientific/Engineering :: Statistics",
+]
+keywords = [
+    "empirical model discrepancy",
+    "bayes factor",
+    "inference",
+    "model comparison",
+    "bayesian",
+]
+
+dependencies = [
+    "more-itertools",
+    "numpy",
+    "scipy>=1.7",  # integrate.simpson changed name (used to be 'simps')
+    "pydantic>=1.8.2",
+    "tqdm",
+    "joblib",
+        # joblib is used for on-disk caching of calculations used in calibration plots    
+    "scityping",
+    "valconfig>=0.1.2-rc3",
+]
+
+dynamic = ["version"]
+
+[project.optional-dependencies]
+
+## Development dependencies
+dev = ["pytest"]
+
+## Dependencies of the `viz` utilities module ##
+viz = ["pandas", "matplotlib", "seaborn", "holoviews", "addict"]
+    # 'addict' used by valconfig.contrib.holoviews
+
+
+## Executing notebooks and building “code browser” pages ##
+doc = [
+    "jupyter-book>=0.14",
+    "sphinx-proof",
+    "jupytext",
+    "ipykernel",
+    "holoviews",
+    "bokeh",
+    "jupyter_bokeh",
+
+    # For bokeh export to png (also used by holoviews)
+    "selenium",
+    "phantomjs",
+    "pillow",
+
+    ## Needed only for `tasks` notebook (which is not executable as a notebook)
+    #"smttask>=0.2.0-rc3",
+
+    # For pushing Jupyter book to Github Pages
+    "ghp-import",
+]
\ No newline at end of file
diff --git a/references.bib b/references.bib
new file mode 100644
index 0000000..78af896
--- /dev/null
+++ b/references.bib
@@ -0,0 +1,39 @@
+@article{gillespieMathematicsBrownianMotion1996,
+  title = {The Mathematics of {{Brownian}} Motion and {{Johnson}} Noise},
+  author = {Gillespie, Daniel T.},
+  year = {1996},
+  month = mar,
+  journal = {American Journal of Physics},
+  volume = {64},
+  number = {3},
+  pages = {225--240},
+  issn = {0002-9505, 1943-2909},
+  doi = {10.1119/1.18210},
+  urldate = {2014-04-23},
+  abstract = {One reason why Brownian motion and Johnson noise are difficult subjects to teach is that their mathematical requirements transcend the capabilities of ordinary differential calculus. Presented here is an exposition of the needed generalization of calculus, namely continuous Markov process theory, in a form that should be accessible to advanced physics undergraduates. It is shown how this mathematical framework enables one to give clear, concise derivations of all the principal results of Brownian motion and Johnson noise, including fluctuation{\textendash}dissipation formulas, auto-covariance transport formulas, spectral density formulas, Nyquist's formula, the notions of white and 1/f 2noise, and an accurate numerical simulation algorithm. An added benefit of this exposition is a clearer view of the mathematical connection between the two very different approaches to Brownian motion taken by Einstein and Langevin in their pioneering papers of 1905 and 1908.},
+  keywords = {1/f noise,Brownian motion,Calculus,dynamical system,Fokker-Planck equation,Markov processes,primer,stochastic,Thermal noise},
+  file = {/home/rene/var/Zotero/storage/PPKWD7XS/Gillespie - 1996 - The mathematics of Brownian motion and Johnson noi.pdf;/home/rene/var/Zotero/storage/TQB5NDW7/1.html}
+}
+
+@incollection{mateu-figuerasDistributionsSimplexRevisited2021,
+  title = {Distributions on the {{Simplex Revisited}}},
+  booktitle = {Advances in {{Compositional Data Analysis}}: {{Festschrift}} in {{Honour}} of {{Vera Pawlowsky-Glahn}}},
+  author = {{Mateu-Figueras}, Gloria and Monti, Gianna S. and Egozcue, J. J.},
+  editor = {Filzmoser, Peter and Hron, Karel and {Mart{\'i}n-Fern{\'a}ndez}, Josep Antoni and {Palarea-Albaladejo}, Javier},
+  year = {2021},
+  pages = {61--82},
+  publisher = {{Springer International Publishing}},
+  address = {{Cham}},
+  doi = {10.1007/978-3-030-71175-7_4},
+  urldate = {2022-12-26},
+  abstract = {A large number of families of distributions are available to model multivariate real vectors. On the contrary, for the simplex sample space, we have only a limited number of families arising through the generalization of the Dirichlet family or the logratio normal family. This chapter tries to summarize those models and some generalizations with a special emphasis on the algebraic-geometric structure of the simplex and on the measure which is considered compatible. In particular, the shifted-scaled Dirichlet distribution is studied and the logratio t distribution is rewritten and studied with respect to the Aitchison measure.},
+  isbn = {978-3-030-71175-7},
+  langid = {english},
+  file = {/home/rene/var/Zotero/storage/A6H585YL/Mateu-Figueras et al_2021_Distributions on the Simplex Revisited.pdf}
+}
+
+@unpublished{reneFalsifyingModels2024,
+  title = {Falsifying models using empirical loss discrepancy},
+  author = {René, Alexandre and Longtin, André},
+  note = {In preparation}
+}
\ No newline at end of file
diff --git a/references.md b/references.md
new file mode 100644
index 0000000..1796fd0
--- /dev/null
+++ b/references.md
@@ -0,0 +1,5 @@
+# References
+
+```{bibliography}
+:style: unsrt
+```
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index defb27a..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,85 +0,0 @@
-[metadata]
-name    = emd-falsify
-version = 0.1.0-dev.0
-description = Original implementation of the EMD (empirical model discrepancy) model comparison criterion
-long_description = file: README.md
-long_description_content_type = text/markdown
-
-url = 
-
-author = Alexandre René
-author_email = arene010@uottawa.ca
-classifiers =
-    Development Status :: 3 - Alpha
-    Intended Audience :: Science/Research
-    Programming Language :: Python :: 3
-    Programming Language :: Python :: 3 :: Only
-    Programming Language :: Python :: 3.9
-    Topic :: Scientific/Engineering
-    Topic :: Scientific/Engineering :: Mathematics
-    Topic :: Scientific/Engineering :: Statistics
-keywords =
-    empirical model discrepancy
-    bayes factor
-    inference
-    model comparison
-    bayesian
-
-
-[options]
-packages =
-    emd_falsify
-    emd_falsify.config
-# include_package_data = True
-#     # Relies on setuptools-scm, which must be included in pyproject.cfg
-
-python_requires = ~=3.9
-
-install_requires =
-    numpy
-#    pandas
-    pydantic>=1.8.2
-    tqdm
-    joblib
-        # joblib is used for on-disk caching of calculations used in calibration plots    
-
-    ## Personal / patched packages ##
-    scityping
-
-
-[options.package_data]
-* =
-  *.cfg
-  .*.cfg
-
-[options.extras_require]
-dev =
-    pytest
-
-viz =
-    ## Extra dependencies of the `viz` utilities module ##
-    pandas
-    matplotlib
-    seaborn
-    holoviews
-
-
-doc =
-    ## For executing notebooks ##
-    jupytext
-    ipykernel
-    holoviews
-    bokeh
-    jupyter_bokeh
-    # For bokeh export to png (also used by holoviews)
-    selenium
-    phantomjs
-    pillow
-
-    ## For building with Jupyter book ##
-    # jupyter-book>=0.13.0
-    # MyST-Parser~=0.15.2
-    # MyST-NB~=0.13.2
-    # MyST-NB-Bokeh>=0.5.3
-    # jupyter-sphinx>=0.3.2
-    jupyter-book>=0.14
diff --git a/emd_falsify/__init__.py b/src/emd_falsify/__init__.py
similarity index 100%
rename from emd_falsify/__init__.py
rename to src/emd_falsify/__init__.py
diff --git a/src/emd_falsify/_version.py b/src/emd_falsify/_version.py
new file mode 100644
index 0000000..bf69a08
--- /dev/null
+++ b/src/emd_falsify/_version.py
@@ -0,0 +1,16 @@
+# file generated by setuptools_scm
+# don't change, don't track in version control
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple, Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+else:
+    VERSION_TUPLE = object
+
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+
+__version__ = version = '0.1.dev62+geed1e58.d20231229'
+__version_tuple__ = version_tuple = (0, 1, 'dev62', 'geed1e58.d20231229')
diff --git a/src/emd_falsify/config/__init__.md b/src/emd_falsify/config/__init__.md
new file mode 100644
index 0000000..16bd2d4
--- /dev/null
+++ b/src/emd_falsify/config/__init__.md
@@ -0,0 +1,150 @@
+---
+jupytext:
+  encoding: '# -*- coding: utf-8 -*-'
+  formats: py:percent,md:myst
+  notebook_metadata_filter: -jupytext.text_representation.jupytext_version
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+kernelspec:
+  display_name: Python (emd-falsify-dev)
+  language: python
+  name: emd-falsify-dev
+---
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+# Configuration options
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [hide-input]
+---
+import uuid
+from pathlib import Path
+from typing import Optional, ClassVar, Union, Literal, Dict
+from configparser import ConfigParser
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [hide-input]
+---
+from pydantic import BaseModel, Field, validator, root_validator
+# from mackelab_toolbox.config import ValidatingConfig, prepend_rootdir, ensure_dir_exists
+# from mackelab_toolbox.config.holoviews import FiguresConfig
+    # prepend_rootdir is a workaround because assigning automatically doesn’t currently work
+from valconfig import ValConfig, ensure_dir_exists
+from valconfig.contrib.holoviews import FiguresConfig, HoloMPLConfig, HoloBokehConfig, GenericParam
+from scityping import Config as ScitypingConfig
+```
+
+Possible improvement: If we could have nested config parsers, we might be 
+    able to rely more on the ConfigParser machinery, and less on a custom
+    Pydantic type, which should be easier for others to follow.
+    In particular, the `colors` field could remain a config parser, although
+    we would still want to allow dotted access.
+
+```{code-cell}
+class Config(ValConfig):
+    __default_config_path__   = "defaults.cfg"
+
+    class paths:
+        figures : Path
+
+    class mp:
+        max_cores: int
+        maxtasksperchild: Union[int,None]
+
+    class caching:
+        """
+        Note that the `joblib` options are ignored when `use_disk_cache` is False.
+        """
+        use_disk_cache: bool=False
+
+        class joblib:
+            """
+            these arguments are passed on to joblib.Memory.
+            When `use_disk_cache` is True, functools.lru_cache is used instead of
+            joblib.Memory, and the other config options are ignored.
+
+            .. Notes::
+               My reading of joblib's sources suggests that relevant values for
+               `verbose` are 0, 1 and 2.
+            """
+            location: Path=".joblib-cache"
+            verbose : int=0
+            backend : str="local"
+            mmap_mode: Optional[str]=None
+            compress: Union[bool,int]=False
+
+            # _prepend_rootdir = validator("location", allow_reuse=True)(prepend_rootdir)
+
+            # We could comment this out, since although pickled data are not machine portable, since the hash/filename is computed
+            # from the pickle, if another machine tries to load to load from the same location, it should be OK.
+            # However this potentially mixes 1000’s of files from different machines
+            # in the same directory, making it almost impossible to later remove outputs from a specific machine.
+            # (E.g. remove the laptop runs but keep the ones from the server)
+            @validator("location")
+            def make_location_unique(cls, location):
+                """
+                Add a machine-specific unique folder to the cache location,
+                to avoid collisions with other machines.
+                (Caches are pickled data, so not machine-portable.)
+                """
+                alphabet = "abcdefghijklmnopqrstuvwxyz"
+                num = uuid.getnode()
+                # For legibility, replace the int by an equivalent string
+                clst = []
+                while num > 0:
+                    clst.append(alphabet[num % 26])
+                    num = num // 26
+                hostdir = "host-"+"".join(clst)
+                return location/hostdir
+
+    class viz(FiguresConfig):
+        class matplotlib(HoloMPLConfig):
+            prohibited_area : Dict[str, GenericParam]={}
+            discouraged_area: Dict[str, GenericParam]={}
+            calibration_curves: Dict[str, GenericParam]={}
+        class bokeh(HoloBokehConfig):
+            prohibited_area : Dict[str, GenericParam]={}
+            discouraged_area: Dict[str, GenericParam]={}
+            calibration_curves: Dict[str, GenericParam]={}
+
+    scityping: ScitypingConfig={}
+    
+    @validator('scityping')
+    def add_emd_falsify_safe_packages(scityping):
+        scityping.safe_packages |= {"emd_falsify.tasks"}
+        # scityping.safe_packages |= {"emd_falsify.models", "emd_falsify.tasks"}
+        return scityping
+```
+
+config = Config(Path(__file__).parent/"defaults.cfg",
+                config_module_name=__name__)
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [skip-execution]
+---
+config = Config()
+```
+
+# Default options
+
+These are stored in the text file `defaults.cfg`.
+
+```{include} defaults.cfg
+:literal: true
+```
diff --git a/emd_falsify/config/__init__.py b/src/emd_falsify/config/__init__.py
similarity index 82%
rename from emd_falsify/config/__init__.py
rename to src/emd_falsify/config/__init__.py
index ce94b21..6aa77cf 100644
--- a/emd_falsify/config/__init__.py
+++ b/src/emd_falsify/config/__init__.py
@@ -1,9 +1,29 @@
 # -*- coding: utf-8 -*-
+# ---
+# jupyter:
+#   jupytext:
+#     formats: py:percent,md:myst
+#     notebook_metadata_filter: -jupytext.text_representation.jupytext_version
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#   kernelspec:
+#     display_name: Python (emd-falsify-dev)
+#     language: python
+#     name: emd-falsify-dev
+# ---
+
+# %% [markdown] editable=true slideshow={"slide_type": ""}
+# # Configuration options
+
+# %% editable=true slideshow={"slide_type": ""} tags=["hide-input"]
 import uuid
 from pathlib import Path
 from typing import Optional, ClassVar, Union, Literal, Dict
 from configparser import ConfigParser
 
+# %% editable=true slideshow={"slide_type": ""} tags=["hide-input"]
 from pydantic import BaseModel, Field, validator, root_validator
 # from mackelab_toolbox.config import ValidatingConfig, prepend_rootdir, ensure_dir_exists
 # from mackelab_toolbox.config.holoviews import FiguresConfig
@@ -12,24 +32,21 @@
 from valconfig.contrib.holoviews import FiguresConfig, HoloMPLConfig, HoloBokehConfig, GenericParam
 from scityping import Config as ScitypingConfig
 
+
+# %% [markdown]
 # Possible improvement: If we could have nested config parsers, we might be 
 #     able to rely more on the ConfigParser machinery, and less on a custom
 #     Pydantic type, which should be easier for others to follow.
 #     In particular, the `colors` field could remain a config parser, although
 #     we would still want to allow dotted access.
 
+# %%
 class Config(ValConfig):
     __default_config_path__   = "defaults.cfg"
 
     class paths:
         figures : Path
 
-    #     _ensure_dir_exists = validator("figuresdir", allow_reuse=True
-    #                                   )(ensure_dir_exists)
-
-    # class random:
-    #     entropy: int
-
     class mp:
         max_cores: int
         maxtasksperchild: Union[int,None]
@@ -99,7 +116,18 @@ def add_emd_falsify_safe_packages(scityping):
         return scityping
 
 
+# %% [markdown]
 # config = Config(Path(__file__).parent/"defaults.cfg",
 #                 config_module_name=__name__)
 
-config = Config()
\ No newline at end of file
+# %% editable=true slideshow={"slide_type": ""} tags=["skip-execution"]
+config = Config()
+
+# %% [markdown]
+# # Default options
+#
+# These are stored in the text file `defaults.cfg`.
+#
+# ```{include} defaults.cfg
+# :literal: true
+# ```
diff --git a/emd_falsify/config/defaults.cfg b/src/emd_falsify/config/defaults.cfg
similarity index 98%
rename from emd_falsify/config/defaults.cfg
rename to src/emd_falsify/config/defaults.cfg
index 9affb80..a68e8e5 100644
--- a/emd_falsify/config/defaults.cfg
+++ b/src/emd_falsify/config/defaults.cfg
@@ -5,7 +5,7 @@
 
 [paths]
 # Paths are prepended with the directory containing the user config file
-figures  = ../../figures
+figures  = ../../../figures
 
 [random]
 entropy = 103291791999958856942451524772313066734
diff --git a/emd_falsify/digitize.py b/src/emd_falsify/digitize.py
similarity index 100%
rename from emd_falsify/digitize.py
rename to src/emd_falsify/digitize.py
diff --git a/src/emd_falsify/emd.md b/src/emd_falsify/emd.md
new file mode 100644
index 0000000..e794770
--- /dev/null
+++ b/src/emd_falsify/emd.md
@@ -0,0 +1,892 @@
+---
+jupytext:
+  encoding: '# -*- coding: utf-8 -*-'
+  formats: py:percent,md:myst
+  notebook_metadata_filter: -jupytext.text_representation.jupytext_version
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+kernelspec:
+  display_name: Python (emd-falsify-dev)
+  language: python
+  name: emd-falsify-dev
+---
+
++++ {"tags": ["remove-cell"], "editable": true, "slideshow": {"slide_type": ""}}
+
+---
+math:
+  '\RR'    : '\mathbb{R}'
+  '\nN'    : '\mathcal{N}'
+  '\D'     : '\mathcal{D}'
+  '\l'     : 'l'
+  '\Me'    : '\mathcal{M}^ε'
+  '\Unif'  : '\mathop{\mathrm{Unif}}'
+  '\Philt' : '\widetilde{Φ}_{|#1}'
+  '\Bemd'  : 'B_{#1}^{\mathrm{EMD}}'
+---
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+(supp_emd-implementation)=
+# EMD implementation
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [hide-input]
+---
+import logging
+import multiprocessing as mp
+from collections.abc import Callable, Mapping
+from math import sqrt, ceil, isclose
+from itertools import product  # Only used in calibration_plot
+from functools import partial
+from more_itertools import all_equal
+import numpy as np
+from numpy.random import default_rng
+from scipy.integrate import simpson
+from scipy.interpolate import interp1d
+from scipy.stats import truncnorm
+from scipy.special import erf
+from tqdm.auto import tqdm
+
+from typing import Optional, Union, Any, Literal, Tuple, List, Dict, NamedTuple
+from scityping.numpy import Array
+
+from emd_falsify import Config
+from emd_falsify.path_sampling import generate_quantile_paths
+from emd_falsify.memoize import memoize
+
+config = Config()
+logger = logging.getLogger(__name__)
+```
+
+```{code-cell}
+__all__ = ["interp1d", "make_empirical_risk_ppf", "draw_R_samples", "Bemd"]
+```
+
++++ {"tags": ["remove-cell"], "editable": true, "slideshow": {"slide_type": ""}}
+
+Notebook only imports
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb, remove-cell]
+---
+from scipy import stats
+import holoviews as hv
+hv.extension(config.viz.backend)
+logging.basicConfig(level=logging.WARNING)
+logger.setLevel(logging.ERROR)
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb, remove-cell]
+---
+colors = config.viz.matplotlib.colors["medium-contrast"]
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+(supp_emd-implementation_example-sampling-paths)=
+## Path sampler test
+
+$$\begin{aligned}
+\tilde{\l} &= \log Φ\,, & Φ &\in [0, 1]\\
+\tilde{σ} &= c \sin π Φ \,, & c &\in \mathbb{R}_+
+\end{aligned}$$
+
+The upper part of the yellow region is never sampled, because monotonicity prevents paths from exceeding $\log 1$ at any point. The constant $c$ is determined by a calibration experiment, and controls the variability of paths. Here we use $c=1$.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [hide-input, active-ipynb]
+---
+res = 7
+#Φarr = np.arange(1, 2**res) / 2**res
+qstar = np.log
+σtilde = lambda Φ: np.sin(Φ * np.pi)
+M = 20
+
+Φarr = np.arange(1, 2**res) / 2**res
+lcurve = hv.Curve(zip(Φarr, qstar(Φarr)), kdims=["Φ"], vdims=["l"], label=r"$\langle\tilde{l}\rangle$")
+σarea = hv.Area((Φarr, qstar(Φarr) - σtilde(Φarr), qstar(Φarr) + σtilde(Φarr)),
+                kdims=["Φ"], vdims=["l-σ", "l+σ"])
+GP_fig = σarea.opts(edgecolor="none", facecolor="#EEEEBB", backend="matplotlib") * lcurve
+
+qhat_gen = generate_quantile_paths(qstar, σtilde, c=1, M=M, res=res, Phistart=Φarr[0])
+random_colors = default_rng().uniform(0.65, 0.85, M).reshape(-1,1) * np.ones(3)  # Random light grey for each curve
+qhat_curves = [hv.Curve(zip(Φhat, qhat), kdims=["Φ"], vdims=["l"], label=r"$\hat{l}$")
+               .opts(color=color)               
+               for (Φhat, qhat), color in zip(qhat_gen, random_colors)]
+
+Φ_fig = hv.Overlay(qhat_curves)
+
+GP_fig * Φ_fig
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+Similar test, but we allow variability in the end point. Note that now samples samples can cover all of the yellow region.
+
+$$\begin{aligned}
+\tilde{\l} &= \log Φ\,, & Φ &\in [0, 1]\\
+\tilde{σ} &= c \sin \frac{3 π Φ}{4} \,,  & c &\in \mathbb{R}_+ \,.
+\end{aligned}$$
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+$$\begin{aligned}
+\tilde{\l} &= \log Φ\,, & Φ &\in [0, 1]
+\end{aligned}$$
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb, hide-input]
+---
+res = 7
+qstar = np.log
+σtilde = lambda Φ: np.sin(Φ * 0.75*np.pi)
+M = 20
+
+Φarr = np.arange(1, 2**res) / 2**res
+lcurve = hv.Curve(zip(Φarr, qstar(Φarr)), kdims=["Φ"], vdims=["l"], label=r"$\langle\tilde{l}\rangle$")
+σarea = hv.Area((Φarr, qstar(Φarr) - σtilde(Φarr), qstar(Φarr) + σtilde(Φarr)),
+                kdims=["Φ"], vdims=["l-σ", "l+σ"])
+GP_fig = σarea.opts(edgecolor="none", facecolor="#EEEEBB", backend="matplotlib") * lcurve
+
+qhat_gen = generate_quantile_paths(qstar, σtilde, c=1, M=M, res=res, Phistart=Φarr[0])
+random_colors = default_rng().uniform(0.65, 0.85, M).reshape(-1,1) * np.ones(3)  # Random light grey for each curve
+qhat_curves = [hv.Curve(zip(Φarr, qhat), kdims=["Φ"], vdims=["l"], label=r"$\hat{l}$")
+               .opts(color=color)               
+               for (Φhat, qhat), color in zip(qhat_gen, random_colors)]
+
+Φ_fig = hv.Overlay(qhat_curves)
+
+GP_fig * Φ_fig
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+## Serializable PPF functions
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+### Serializable 1d interpolator
+
+Our [workflow tasks](./tasks.py) require arguments to be serializable, which include the callable arguments used to compute the center $q^*$ and square root of the metric variance ($c δ^{\EMD}$). These functions are almost always going to be obtained by constructing an interpolator from empirical samples, and the obvious choice for that is *SciPy*’s `interp1d`. However this type is of course not serializable out of the box.
+
+We could define a custom version of `interp1d` (in fact we do) which adds serializability within our framework. However this would be bad form, requiring users to use special classes for seemingly obscure reasons. So instead we make the original class in `scipy.interpolate` serializable, following the format described in [*Defining serializers for preexisting types*](https://scityping.readthedocs.io/en/stable/defining-serializers.html) from the *scityping* documentation. In the process this does define a custom `interp1d` type, but in practice either this one or the standard one from `scipy.interpolate` can be used interchangeably.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+from typing import Literal
+from dataclasses import dataclass
+import scipy.interpolate
+from scityping import Serializable
+from scityping.numpy import Array
+
+class interp1d(Serializable, scipy.interpolate.interp1d):
+    """
+    Subclass of `scipy.interpolate.interp1d` which is serializable within the
+    `scitying` framework. Otherwise completely equivalent to the version in *scipy*.
+    """
+    @dataclass
+    class Data:
+        x: Array         # These are all possible arguments to interp1d
+        y: Array         # as of scipy v1.11.1
+        kind: str|int
+        axis: int|None
+        copy: bool|None
+        bounds_error: bool|None
+        fill_value: Literal["extrapolate"]|Tuple[Array, Array]|Array
+        
+        def encode(f):
+            return (f.x, f.y, f._kind, f.axis, f.copy, f.bounds_error, f.fill_value)
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+## Estimate the risk quantile function
+
+Also known as the point probability function (PPF), this is the function $q(Φ)$ required by `draw_R_samples` to draw samples of the expected risk. Note that this is the quantile function *of the risk*, not of models predictions, so it is always a 1d function.
+
+- The abscissa of the quantile function (PPF) is the cumulative probability $Φ \in [0, 1]$.
+  The ordinate is the *risk* of a data sample.
+- The integral of $q(Φ)$ is the expected risk: $\int_0^1 q(Φ) dΦ = R$.  
+  (This is easy to see in 1d, where $dΦ = p(x)dx$.)
+- The EMD approximation defines a distribution over quantile functions of the risk.
+  - It sets the square root of the *metric variance* at $Φ$ to be proportional to the discrepancy between $q^*(Φ)$ and $\tilde{q}(Φ)$.
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+In some cases it may be possible to derive $q$ analytically from a models equations, but in general we need to estimate it from samples. Concretelly all this does is construct a `scipy.interpolate.interp1d` object: if $L$ risk samples are given, they are sorted, and assigned the cumulative probabilities $Φ = \frac{1}{L+1}, \frac{2}{L+1}, \dotsc, \frac{L}{L+1}$. Intermediate values are obtained by linear interpolation. We don’t assign  the $Φ=0$ and $Φ=1$, since it is more conservative to assume that the absolute extrema have not been sampled – instead we use linear extrapolation for the small intervals $\bigl[0, \frac{1}{L+1}\bigr)$ and $\bigl(\frac{L}{L+1}, 1\bigr]$.
+
+If users want to use different assumptions – for example if users know that the highest possible risk is part of the sample set – then they may construct the `scipy.interpolate.interp1d` object directly.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+def make_empirical_risk_ppf(risk_samples: Array[float,1]):
+    """Convert a set of samples into a callable and serializable PPF function.
+
+    The PPF describes the distribution from which the samples were drawn.
+    Sample sizes of 1000-4000 are usually sufficient, although this depends on
+    the shape of the PPF. As a general rule, the larger the maximum derivative
+    along the PPF, the more samples are needed to resolve it accurately.
+    Consequently, the best test to check whether the empirical PPF is a good
+    approximation is simply to plot it and inspect the result visually.
+
+    Concretely this just a wrapper for `scipy.interpolate.interp1d`, with a
+    possibly more intuitive function and argument names.
+    It is completely equivalent to call `interp1d` directly.
+
+    .. Important:: This expects to receive samples of the risk (not the model).
+       Typically this is obtain with something like ``risk(model(n_data_points))``,
+       where ``model`` is a generative model and ``risk`` the per-data-point risk
+       function.
+
+    .. Note:: When calling `Bemd` directly, any callable will work for the PPF
+       argument. However, in order to use the `Calibrate` task under `emd_falsify.tasks`,
+       it is necessary for the PPF callable to be *serializable*. This package
+       adds special support to make scipy’s `interp1d` class serializable, but other
+       interpolators will not work out of the box with `Calibrate`.
+       The easiest way to make an arbitrary callable serializable is probably to
+       use a `dataclasses.dataclass`: use class attributes for the parameters,
+       and define the PPF in the __call__ method. (Scityping has built-in support
+       for dataclasses with simply data types.)
+
+    """
+    risk_samples = np.asarray(risk_samples)
+    if risk_samples.ndim != 1:
+        raise ValueError("A risk PPF should always be 1d, but the provided "
+                         f"array `risk_samples` is {risk_samples.ndim}d.")
+    L = len(risk_samples)
+    Φarr = np.arange(1, L+1) / (L+1)
+    return interp1d(Φarr, np.sort(risk_samples), fill_value="extrapolate")
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+## Draw samples of the expected risk $R$
+
+**Given**
+
+- $\D^L$: A data set of $L$ samples.
+- $\l_{A;\Me}: X \times Y \to \RR$: Function returning the log likelihood of a sample.
+- $c$: Proportionality constant between the EMD and the metric variance when sampling increments.
+- $r$: Resolution of the $\Philt{\Me,A}$ paths. Paths will discretized into $2^r$ steps. (And therefore contain $2^r + 1$ points.)
+- $M$: The number of paths over which to average.
+
+**Return**
+
+- Array of $R$ values, drawn from from the EMD distribution
+
+See {prf:ref}`alg-estimator-statistics`.
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+:::{margin}  
+The rule for computing `new_M` comes from the following ($ε$: `stderr`, $ε_t$: `stderr_tol`, $M'$: `new_M`)
+```{math}
+ε &= σ/\sqrt{M} \\
+ε' &= σ/\sqrt{M'} \\
+\frac{ε^2}{ε'^2} &= \frac{M'}{M}
+\end{aligned}
+```
+:::
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [remove-cell]
+---
+@memoize(ignore=["path_progbar"])
+def draw_R_samples(mixed_risk_ppf: Callable,
+                   synth_risk_ppf: Callable,
+                   c: float, *,
+                   res: int=8, M: int=128, max_M: int=1024,
+                   relstderr_tol: float=2**-5,  # 2⁻⁵ ≈ 0.03
+                   path_progbar: Union[Literal["auto"],None,tqdm,mp.queues.Queue]=None,
+                   print_relstderr: bool=False
+                  ) -> Array[float, 1]:
+    """
+    Draw samples of the expected risk `R` using the EMD hierarchical beta process
+    to generate model cumulative distribution functions. This is meant to approximate
+    the expected risk on unseen data, while accounting for uncertainty in the model
+    itself.
+    The more accurate the model (as measured by its statistical correspondence with
+    the data), the more tightly the R samples are distributed.
+
+    Concretely this is done by comparing two quantile functions (also known as
+    point probability functions):
+
+    - The *mixed PPF* is obtained by evaluating the risk of the actual observed
+      data samples.
+    - The *synthetic PPF* is obtained by evaluating the risk on synthetic samples,
+      generated using the same theoretical model used to compute the risk.
+
+    Thus if the theory perfectly describes the data, then the two quantiles should
+    be identical (up to sampling errors). Note that these are the quantile functions
+    *of the risk*, not of models predictions, so they are always a 1d functions.
+
+    In some sense this function boils down to calling `path_sampling.generate_quantile_paths`
+    `M` times, integrating each resulting path with Simpson’s rule to get its
+    expected risk `R`, and returning the resulting `R` values as an array.
+    However, it also provides the very useful convenience of automatically adjusting
+    the number `M` of realizations needed to estimate the mean value of `R`.
+    It does this by first generating `M` realizations and then computing the
+    standard error on the mean of `R`: if this is greater than `relstderr * mean(R)`,
+    then new realizations are generated to try to achieve the target accuracy.
+    (The number of realizations added depends on how far away we are from that target.)
+    This process is repeated until either the target accuracy is reached, or the
+    number of realizations reaches `max_M`. In the latter case a warning is printed.
+    
+    .. Note:: When using multiprocessing to call this function multiple times,
+       use either a `multiprocessing.Queue` or `None` for the `progbar` argument.
+    
+    Parameters
+    ----------
+    mixed_risk_ppf: Quantile function of the risk using *observed* data samples.
+    synth_risk_ppf: Quantile function of the risk using *synthetic* data samples.
+    c: Proportionality constant between EMD and path sampling variance.
+    res: Controls the resolution of the random quantile paths generated to compute statistics.
+       Paths have ``2**res`` segments; typical values of `res` are 6, 7 and 8, corresponding
+       to paths of length 64, 128 and 256. Smaller may be useful to accelerate debugging,
+       but larger values are unlikely to be useful.
+    M: The minimum number of paths over which to average.
+       Actual number may be more, to achieve the specified standard error.
+    max_M: The maximum number of paths over which to average.
+       This serves to prevent runaway computation in case the specified
+       standard error is too low.
+    relstderr_tol: The maximum relative standard error on the moments we want to allow.
+       (i.e. ``stderr / |mean(R)|``). If this is exceeded after taking `M` path samples,
+       the number of path samples is increased until we are under tolerance, or we have
+       drawn 1000 samples. A warning is displayed if 1000 paths does not achieve tolerance.
+    path_progbar: Control whether to create progress bar or use an existing one.
+       - With the default value 'auto', a new tqdm progress is created.
+         This is convenient, however it can lead to many bars being created &
+         destroyed if this function is called within a loop.
+       - To prevent this, a tqdm progress bar can be created externally (e.g. with
+         ``tqdm(desc="Generating paths")``) and passed as argument.
+         Its counter will be reset to zero, and its set total to `M` + `previous_M`.
+       - (Multiprocessing): To support updating progress bars within child processes,
+         a `multiprocessing.Queue` object can be passed, in which case no
+         progress bar is created or updated. Instead, each time a quantile path
+         is sampled, a value is added to the queue with ``put``. This way, the
+         parent process can update a progress by consuming the queue; e.g.
+         ``while not q.empty(): progbar.update()``.
+         The value added to the queue is `M`+`previous_M`, which can be
+         used to update the total value of the progress bar.
+       - A value of `None` prevents displaying any progress bar.
+    print_relstderr: Debug option. Setting to true will cause the number of realizations
+       and associated standard error to be printed each time `M` is increased.
+       Useful for checking if the requested resolution is reasonable.
+       
+    Returns
+    -------
+    array of floats
+    """
+
+    δemd = lambda Φ: abs(mixed_risk_ppf(Φ) - synth_risk_ppf(Φ))
+                      
+    # Compute m1 for enough sample paths to reach relstderr_tol
+    m1 = []
+    def generate_paths(M, previous_M=0, qstar=mixed_risk_ppf, δemd=δemd, c=c, res=res, progbar=path_progbar):
+        for Φhat, qhat in generate_quantile_paths(qstar, δemd, c=c, M=M, res=res,
+                                                  progbar=progbar, previous_M=previous_M):
+            m1.append(simpson(qhat, Φhat))  # Generated paths always have an odd number of steps, which is good for Simpson's rule
+
+    generate_paths(M)
+    μ1 = np.mean(m1)
+    Σ1 = np.var(m1)
+    relstderr = sqrt(Σ1) / max(abs(μ1), 1e-8) / sqrt(M)  # TODO?: Allow setting abs tol instead of hardcoding 1e-8 ?
+    if print_relstderr:
+        print(f"{M=}, {relstderr=}")
+    while relstderr > relstderr_tol and M < max_M:
+        # With small M, we don’t want to put too much trust in the
+        # initial estimate of relstderr. So we cap increases to doubling M.
+        new_M = min(ceil( (relstderr/relstderr_tol)**2 * M ), 2*M)
+        logger.debug(f"Increased number of sampled paths (M) to {new_M}. "
+                     f"Previous rel std err: {relstderr}")
+        if new_M > max_M:
+            new_M = max_M
+            logger.warning(f"Capped the number of sample paths to {max_M} "
+                           "to avoid undue computation time.")
+        if new_M == M:
+            # Can happen due to rounding or because we set `new_M` to `max_M`
+            break
+        generate_paths(new_M - M, M)
+        M = new_M
+        μ1 = np.mean(m1)
+        Σ1 = np.var(m1)
+        relstderr = sqrt(Σ1) / max(abs(μ1), 1e-8) / sqrt(M)  # TODO?: Allow setting abs tol instead of hardcoding 1e-8 ?
+        if print_relstderr:
+            print(f"{M=}, {relstderr=}")
+        
+    if relstderr > relstderr_tol:
+        logger.warning("Requested std err tolerance was not achieved. "
+                       f"std err: {relstderr}\nRequested max std err: {relstderr_tol}")
+    return np.array(m1)
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+### Test sampling of expected risk $R$
+
+$$\begin{aligned}
+x &\sim \Unif(0, 3) \\
+y &\sim e^{-λx} + ξ
+\end{aligned}$$
+
+:::{list-table}
+
+* - Theory model $A$:
+  - $λ=1$
+  - $ξ \sim \nN(0, 1)$.
+* - Theory model $B$:
+  - $λ=1.1$
+  - $ξ \sim \nN(0, 1)$.
+* - True data-generating model:
+  - $λ=1$
+  - $ξ \sim \nN(-0.03, 1)$. 
+ 
+:::
+
+In this example, neither model $A$ nor $B$ is a perfect fit to the data, since they both incorrectly assume an unbiased noise. Moreover, both models seem to predict the observations equally well; in other words, we expect the EMD criterion to be *equivocal* between models $A$ and $B$.
+
+Within the EMD framework, models are compared as usual based on their expected risk $R$. This captures aleatoric uncertainty – i.e. randomness inherent to the model, such as the $ξ$ process above. The EMD criterion then further captures *epistemic* uncertainty by treating $R$ as a random variable, and considering *its* distribution. Roughly speaking, the better a model is at predicting the data distribution, the tighter its $R$ distribution will be. (For example, a model can have a lot of noise, but if we can predict the statistics of that noise accurately, then the distribution on $R$ will be tight and its uncertainty low.)
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb, hide-input]
+---
+λ = 1
+λB = 1.2
+σ = 0.25
+δy = -0.03
+L = 400
+seed = 123
+
+def generative_data_model(L, rng=None):
+    rng = default_rng(rng)
+    x = rng.uniform(0, 3, L)
+    y = np.exp(-λ*x) + rng.normal(-0.03, σ, L)
+    return x, y
+
+def generative_theory_modelA(L, rng=None):
+    rng = default_rng(rng)
+    x = rng.uniform(0, 3, L)
+    y = np.exp(-λ*x) + rng.normal(0, σ, L)
+    return x, y
+def riskA(xy):
+    "Negative log likelihood of model A"
+    x, y = xy
+    return -stats.norm(0, 1).logpdf(y - np.exp(-λ*x))  # z = exp(-λ*x)
+
+def generative_theory_modelB(L, rng=None):
+    rng = default_rng(rng)
+    x = rng.uniform(0, 3, L)
+    y = np.exp(-λB*x) + rng.normal(0, σ, L)
+    return x, y
+def riskB(xy):
+    "Negative log likelihood of model B"
+    x, y = xy
+    return -stats.norm(0, 1).logpdf(y - np.exp(-λB*x))  # z = exp(-λ*x)
+
+observed_data = generative_data_model(L, seed)
+synth_dataA    = generative_theory_modelA(L, seed*2)  # Use different seeds for different models
+synth_dataB    = generative_theory_modelB(L, seed*3)
+
+mixed_ppfA = make_empirical_risk_ppf(riskA(observed_data))
+synth_ppfA = make_empirical_risk_ppf(riskA(synth_dataA))
+
+mixed_ppfB = make_empirical_risk_ppf(riskB(observed_data))
+synth_ppfB = make_empirical_risk_ppf(riskB(synth_dataB))
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+In this example, the discrepancy between the theoretical models and the observed data distribution is very small, so the differences between quantile curves is similarly very small.
+
+- **Synthetic** PPF — Same theoretical model for both the defining the risk and generating the (synthetic) data.
+- **Mixed** PPF – Different models for the risk and data: Again a theoretical model is used to define the risk, but now it is evaluated on real observed data.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb, hide-input]
+---
+# Data panel
+xarr = np.linspace(0, 3)
+scat_data = hv.Scatter(zip(*observed_data), group="data", label="data")
+curve_A = hv.Curve(zip(xarr, np.exp(-λ*xarr)), group="model A", label="model A")
+curve_B = hv.Curve(zip(xarr, np.exp(-λB*xarr)), group="model B", label="model B")
+panel_data = scat_data * curve_A * curve_B
+
+panel_data.opts(
+    hv.opts.Scatter("data", color="grey", alpha=0.5), hv.opts.Scatter("data", s=6, backend="matplotlib"),
+    hv.opts.Curve("model A", color=colors["dark blue"], alpha=0.7),
+    hv.opts.Curve("model B", color=colors["dark red"], alpha=0.7)
+)
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [hide-input, active-ipynb]
+---
+# Quantile function panels
+Φarr = np.linspace(0, 1, 100)
+curve_synthA = hv.Curve(zip(Φarr, synth_ppfA(Φarr)), kdims=["Φ"], vdims=["q"], group="synth", label="model A")
+curve_synthB = hv.Curve(zip(Φarr, synth_ppfB(Φarr)), kdims=["Φ"], vdims=["q"], group="synth", label="model B")
+curve_mixedA = hv.Curve(zip(Φarr, mixed_ppfA(Φarr)), kdims=["Φ"], vdims=["q"], group="mixed", label="model A")
+curve_mixedB = hv.Curve(zip(Φarr, mixed_ppfB(Φarr)), kdims=["Φ"], vdims=["q"], group="mixed", label="model B")
+fig = curve_synthA * curve_synthB * curve_mixedA * curve_mixedB
+
+zoomrect = hv.Rectangles([(.6, .95, .99, 1.15,)]).opts(facecolor="none", edgecolor="grey")
+legendtext = hv.Text(0.61, 1.05, "Dashed lines:\nmixed PPF", halign="left")
+layout = panel_data + fig*zoomrect + fig.redim.range(Φ=(0.6, .99), q=(.95, 1.15))*legendtext
+
+layout.opts(
+    hv.opts.Curve(alpha=0.8, axiswise=True),
+    hv.opts.Curve("synth.model A", color=colors["dark blue"]), hv.opts.Curve("mixed.model A", color=colors["dark blue"]),
+    hv.opts.Curve("synth.model B", color=colors["light red"]), hv.opts.Curve("mixed.model B", color=colors["light red"]),
+    hv.opts.Curve("synth", linestyle="solid", backend="matplotlib"), hv.opts.Curve("synth", line_dash="solid", backend="bokeh"),
+    hv.opts.Curve("mixed", linestyle="dashed", backend="matplotlib"), hv.opts.Curve("mixed", line_dash="dashed", backend="bokeh"),
+    hv.opts.Curve(aspect=1.3, backend="matplotlib"),
+    hv.opts.Layout(fig_inches=4, backend="matplotlib"),
+    hv.opts.Layout(sublabel_format="")
+)
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb, hide-input]
+---
+# R dist panels
+RA_lst = draw_R_samples(mixed_ppfA, synth_ppfA, c=1, M=100, print_relstderr=True)
+RB_lst = draw_R_samples(mixed_ppfB, synth_ppfB, c=1, M=100, print_relstderr=True)
+
+RAlines = hv.Spikes(RA_lst, kdims=["R"], group="model A").opts(spike_length=50)
+RBlines = hv.Spikes(RB_lst, kdims=["R"], group="model B").opts(spike_length=50)
+
+distA = hv.Distribution(RA_lst, kdims=["$R$"], vdims=["$p(R)$"], group="model A")
+distB = hv.Distribution(RB_lst, kdims=["$R$"], vdims=["$p(R)$"], group="model B")
+
+panel_RA = distA * RAlines
+panel_RB = distB * RBlines
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+Each model induces a distribution for its expected risk, so with two models $A$ and $B$ we have distributions for $R_A$ and $R_B$.
+The figures below show the sampled values for $R_A$ and $R_B$, along overlayed with a kernel density estimate of their distribution.
+
+In this case both distributions are very tight, and any difference between them are due as much to finite sampling than to the likelihood picking up which one is the better fit. This translates into distributions with very high overlap, and therefore a probability approximately ½ that model $A$ is better than $B$. In other words, the criterion is *equivocal* between $A$ and $B$, as we expected.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb, hide-input]
+---
+fig = panel_RA + panel_RB + panel_RA*panel_RB
+
+panel_RA.opts(
+    hv.opts.Spikes("model A", color=colors["dark blue"], alpha=0.25),
+    hv.opts.Distribution("model A", edgecolor=colors["dark blue"], facecolor=colors["light blue"], backend="matplotlib")
+)
+panel_RB.opts(
+    hv.opts.Spikes("model B", color=colors["dark red"], alpha=0.25),
+    hv.opts.Distribution("model B", edgecolor=colors["dark red"], facecolor=colors["light red"], backend="matplotlib")
+)
+
+fig.opts(
+    hv.opts.Overlay(aspect=1.3),
+    hv.opts.Layout(fig_inches=3, backend="matplotlib"),
+    hv.opts.Layout(sublabel_format="")
+)
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}, "tags": ["active-ipynb"]}
+
+(supp_emd-implementation_Bemd)=
+## Implementation of $B^{\mathrm{emd}}_{AB;c}$
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+The EMD criterion is defined as
+```{math}
+\begin{aligned}
+\Bemd{AB;c} &:= P(R_A < R_B \mid c) \\
+    &\approx \frac{1}{M_A M_B} \sum_{i=1}^{M_A} \sum_{j=1}^{M_B} \boldsymbol{1}_{R_{A,i} < R_{B,j}}\,,
+\end{aligned}
+```
+where $c$ is a scale parameter and $M_A$ (resp. $M_B$) is the number of samples of $R_A$ (resp. $R_B$). The expression $\boldsymbol{1}_{R_{A,i} < R_{B,j}}$ is one when $R_{A,i} < R_{B,j}$ and zero otherwise.
+We can write the sum as nested Python loops:
+
+:::{margin}
+(`RA_lst` and `RB_lst` are the expected risk samples generated in the example above.)
+:::
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb]
+---
+s = 0
+for rA in RA_lst:
+    for rB in RB_lst:
+        s += (rA < rB)
+s / len(RA_lst) / len(RB_lst)
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+However it is much faster (about 50x in this case) to use a NumPy ufunc:
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb]
+---
+np.less.outer(RA_lst, RB_lst).mean()
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+The `Bemd` function is essentially a convenience function for comparing two models, which calls `draw_R_samples` (once for each models) and then computes the criterion value as above.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+def mp_wrapper(f: Callable, *args, out: "mp.queues.Queue", **kwargs):
+    "Wrap a function by putting its return value in a Queue. Used for multiprocessing."
+    out.put(f(*args, **kwargs))
+    
+LazyPartial = Union[Callable, Tuple[Callable, Mapping]]
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+@memoize(ignore=["progbarA", "progbarB", "use_multiprocessing"])
+def Bemd(mixed_risk_ppfA: Callable, mixed_risk_ppfB: Callable,
+         synth_risk_ppfA: Callable, synth_risk_ppfB: Callable,
+         c: float, *,
+         res: int=7, M: int=32, max_M: int=1024,
+         relstderr_tol: float=2**-5,  # 2⁻⁵ ≈ 0.03
+         progbarA: Union[tqdm,Literal['auto'],None]='auto',
+         progbarB: Union[tqdm,Literal['auto'],None]='auto',
+         use_multiprocessing: bool=True
+        ) -> float:
+
+    """
+    Compute the EMD criterion for two models, whose risk distributions are described
+    by a *mixed risk PPF* (mixing observed samples with theoretical risk function)
+    and a *synthetic risk PPF* (from theoretical samples and theoretical risk function).
+
+    This is accomplished by calling `draw_R_samples` for each model, and using the
+    result to compute the probability that model A has lower risk than model B.
+
+    The main benefit of this function is that it can manage a multiprocessing pool
+    to perform calculations in parallel and keep progress bars updated.
+    
+    For more details on the parameters, see `draw_R_samples`.
+    
+    Parameters
+    ----------
+    mixed_risk_ppfA, mixed_risk_ppfB: Quantile functions of the risk using
+       *observed* data samples.
+    synth_risk_ppfA, synth_risk_ppfB: Quantile function of the risk using
+       *synthetic* data samples.
+    c: Proportionality constant between EMD and path sampling variance.
+    res: Controls the resolution of the random quantile paths generated to compute statistics.
+       Paths have length ``2**res + 1``; typical values of `res` are 6, 7 and 8, corresponding
+       to paths of length 64, 128 and 256. Smaller may be useful to accelerate debugging,
+       but larger values are unlikely to be useful.
+    M: The minimum number of paths over which to average.
+       Actual number may be more, to achieve the specified standard error.
+    max_M: The maximum number of paths over which to average.
+       This serves to prevent runaway computation in case the specified
+       standard error is too low.
+    relstderr_tol: The maximum relative standard error on the moments we want to allow.
+       (i.e. ``stderr / |μ1|``). If this is exceeded after taking `M` path samples,
+       the number of path samples is increased until we are under tolerance, or we have
+       drawn 1000 samples. A warning is displayed if 1000 paths does not achieve tolerance.
+    progbarA, probgbarB: Control whether to create progress bar or use an existing one.
+       These progress bars track the number of generated quantile paths.
+       - With the default value 'auto', a new tqdm progress is created.
+         This is convenient, however it can lead to many bars being created &
+         destroyed if this function is called within a loop.
+       - To prevent this, a tqdm progress bar can be created externally (e.g. with
+         ``tqdm(desc="Generating paths")``) and passed as argument.
+         Its counter will be reset to zero, and its set total to `M` + `previous_M`.
+       - A value of `None` prevents displaying any progress bar.
+    use_multiprocessing: If `True`, the statistics for models A and B are
+       computed simultaneously; otherwise they are computed sequentially.
+       Default is `True`.
+       One reason not to use multiprocessing is if this call is part of a
+       higher-level loop with is itself parallelized: child multiprocessing
+       processes can’t spawn their own child processes.
+
+    TODO
+    ----
+    - Use separate threads to update progress bars. This should minimize their
+      tendency to lag behind the actual number of sampled paths.
+    """
+
+
+    # NB: Most of this function is just managing mp processes and progress bars
+    if isinstance(progbarA, tqdm):
+        close_progbarA = False  # Closing a progbar prevents it from being reused
+    elif progbarA == 'auto':  # NB: This works because we already excluded tqdm (tqdm types raise AttributeError on ==)
+        progbarA = tqdm(desc="sampling quantile fns (A)")
+        close_progbarA = True
+    else:  # With `progbarA=None`, we don’t create a progbar, so nothing to close.
+        close_progbarA = False
+    if isinstance(progbarB, tqdm):
+        close_progbarB = False
+    elif progbarB == 'auto':
+        progbarB = tqdm(desc="sampling quantile fns (B)")
+        close_progbarB = True
+    else:
+        close_progbarB = False
+    
+    if not use_multiprocessing:
+        RA_lst = draw_R_samples(
+            mixed_risk_ppfA, synth_risk_ppfA, c=c,
+            res=res, N=N, M=M, relstderr_tol=relstderr_tol,
+            path_progbar=progbarA)
+        RB_lst = draw_R_samples(
+            mixed_risk_ppfB, synth_risk_ppfB, c=c,
+            res=res, N=N, M=M, relstderr_tol=relstderr_tol,
+            path_progbar=progbarB)
+        
+    else:
+        progqA = mp.Queue() if progbarA is not None else None  # We manage the progbar ourselves. The Queue is used for receiving
+        progqB = mp.Queue() if progbarA is not None else None  # progress updates from the function
+        outqA = mp.Queue()   # Function output values are returned via a Queue
+        outqB = mp.Queue()
+        _draw_R_samples_A = partial(
+            draw_R_samples,
+            mixed_risk_ppfA, synth_risk_ppfA, c=c,
+            res=res, M=M, max_M=max_M, relstderr_tol=relstderr_tol,
+            path_progbar=progqA)
+        _draw_R_samples_B = partial(
+            draw_R_samples,
+            mixed_risk_ppfB, synth_risk_ppfB, c=c,
+            res=res, M=M, max_M=max_M, relstderr_tol=relstderr_tol,
+            path_progbar=progqB)
+        pA = mp.Process(target=mp_wrapper, args=(_draw_R_samples_A,),
+                        kwargs={'path_progbar': progqA, 'out': outqA})
+        pB = mp.Process(target=mp_wrapper, args=(_draw_R_samples_B,),
+                        kwargs={'path_progbar': progqB, 'out': outqB})
+        pA.start()
+        pB.start()
+        progbar_handles = ( ([(progqA, progbarA)] if progbarA is not None else [])
+                           +([(progqB, progbarB)] if progbarB is not None else []) )
+        if progbar_handles:
+            for _, progbar in progbar_handles:
+                progbar.reset()  # We could reset the total here, but already reset it below
+            while pA.is_alive() or pB.is_alive():
+                for (progq, progbar) in progbar_handles:
+                    if not progq.empty():
+                        n = 0
+                        while not progq.empty():  # Flush the progress queue, then update the progress bar.
+                            total = progq.get()   # Otherwise the progress bar may not keep up
+                            n += 1
+                        if total != progbar.total:
+                            progq.dynamic_miniters = False  # Dynamic miniters doesn’t work well when we mess around with the total
+                            # Reset the max for the progress bar
+                            progbar.total = total
+                            if "notebook" in str(progbar.__class__.mro()):  # Specific to tqdm_notebook
+                                progbar.container.children[1].max = total  
+                                progbar.container.children[1].layout.width = None  # Reset width; c.f. progbar.reset()
+                        progbar.update(n)
+
+        pA.join()
+        pB.join()
+        pA.close()
+        pB.close()
+        # NB: Don't close progress bars unless we created them ourselves
+        if close_progbarA: progbarA.close()
+        if close_progbarB: progbarB.close()
+        RA_lst = outqA.get()
+        RB_lst = outqB.get()
+    
+    return np.less.outer(RA_lst, RB_lst).mean()
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+Calling `Bemd` returns the same value as above, up to sampling error:
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb]
+---
+Bemd(mixed_ppfA, mixed_ppfB, synth_ppfA, synth_ppfB, c=1)
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb, remove-input]
+---
+from emd_falsify.utils import GitSHA
+GitSHA()
+```
diff --git a/emd_falsify/emd.py b/src/emd_falsify/emd.py
similarity index 92%
rename from emd_falsify/emd.py
rename to src/emd_falsify/emd.py
index e748a05..1d5ff00 100644
--- a/emd_falsify/emd.py
+++ b/src/emd_falsify/emd.py
@@ -2,19 +2,19 @@
 # ---
 # jupyter:
 #   jupytext:
-#     formats: py:light,md:myst
+#     formats: py:percent,md:myst
+#     notebook_metadata_filter: -jupytext.text_representation.jupytext_version
 #     text_representation:
 #       extension: .py
-#       format_name: light
-#       format_version: '1.5'
-#       jupytext_version: 1.15.0
+#       format_name: percent
+#       format_version: '1.3'
 #   kernelspec:
-#     display_name: Python (emd-paper)
+#     display_name: Python (emd-falsify-dev)
 #     language: python
-#     name: emd-paper
+#     name: emd-falsify-dev
 # ---
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] tags=["remove-cell"] editable=true slideshow={"slide_type": ""}
 # ---
 # math:
 #   '\RR'    : '\mathbb{R}'
@@ -24,16 +24,14 @@
 #   '\Me'    : '\mathcal{M}^ε'
 #   '\Unif'  : '\mathop{\mathrm{Unif}}'
 #   '\Philt' : '\widetilde{Φ}_{|#1}'
-#   '\Elmu'  : 'μ_{{#1}}^{(1)}'
-#   '\Elsig' : 'Σ_{{#1}}^{(1)}'
 #   '\Bemd'  : 'B_{#1}^{\mathrm{EMD}}'
 # ---
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # (supp_emd-implementation)=
 # # EMD implementation
 
-# + tags=["hide-input"] editable=true slideshow={"slide_type": ""}
+# %% editable=true slideshow={"slide_type": ""} tags=["hide-input"]
 import logging
 import multiprocessing as mp
 from collections.abc import Callable, Mapping
@@ -58,24 +56,24 @@
 
 config = Config()
 logger = logging.getLogger(__name__)
-# -
 
+# %%
 __all__ = ["interp1d", "make_empirical_risk_ppf", "draw_R_samples", "Bemd"]
 
-# + [markdown] tags=["remove-cell"] editable=true slideshow={"slide_type": ""}
+# %% [markdown] tags=["remove-cell"] editable=true slideshow={"slide_type": ""}
 # Notebook only imports
 
-# + tags=["active-ipynb", "remove-cell"] editable=true slideshow={"slide_type": ""}
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "remove-cell"]
 # from scipy import stats
 # import holoviews as hv
 # hv.extension(config.viz.backend)
 # logging.basicConfig(level=logging.WARNING)
 # logger.setLevel(logging.ERROR)
 
-# + editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "remove-cell"]
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "remove-cell"]
 # colors = config.viz.matplotlib.colors["medium-contrast"]
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # (supp_emd-implementation_example-sampling-paths)=
 # ## Path sampler test
 #
@@ -86,7 +84,7 @@
 #
 # The upper part of the yellow region is never sampled, because monotonicity prevents paths from exceeding $\log 1$ at any point. The constant $c$ is determined by a calibration experiment, and controls the variability of paths. Here we use $c=1$.
 
-# + tags=["hide-input", "active-ipynb"] editable=true slideshow={"slide_type": ""}
+# %% editable=true slideshow={"slide_type": ""} tags=["hide-input", "active-ipynb"]
 # res = 7
 # #Φarr = np.arange(1, 2**res) / 2**res
 # qstar = np.log
@@ -109,7 +107,7 @@
 #
 # GP_fig * Φ_fig
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # Similar test, but we allow variability in the end point. Note that now samples samples can cover all of the yellow region.
 #
 # $$\begin{aligned}
@@ -117,12 +115,12 @@
 # \tilde{σ} &= c \sin \frac{3 π Φ}{4} \,,  & c &\in \mathbb{R}_+ \,.
 # \end{aligned}$$
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # $$\begin{aligned}
 # \tilde{\l} &= \log Φ\,, & Φ &\in [0, 1]
 # \end{aligned}$$
 
-# + editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "hide-input"]
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "hide-input"]
 # res = 7
 # qstar = np.log
 # σtilde = lambda Φ: np.sin(Φ * 0.75*np.pi)
@@ -144,17 +142,17 @@
 #
 # GP_fig * Φ_fig
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # ## Serializable PPF functions
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # ### Serializable 1d interpolator
 #
 # Our [workflow tasks](./tasks.py) require arguments to be serializable, which include the callable arguments used to compute the center $q^*$ and square root of the metric variance ($c δ^{\EMD}$). These functions are almost always going to be obtained by constructing an interpolator from empirical samples, and the obvious choice for that is *SciPy*’s `interp1d`. However this type is of course not serializable out of the box.
 #
 # We could define a custom version of `interp1d` (in fact we do) which adds serializability within our framework. However this would be bad form, requiring users to use special classes for seemingly obscure reasons. So instead we make the original class in `scipy.interpolate` serializable, following the format described in [*Defining serializers for preexisting types*](https://scityping.readthedocs.io/en/stable/defining-serializers.html) from the *scityping* documentation. In the process this does define a custom `interp1d` type, but in practice either this one or the standard one from `scipy.interpolate` can be used interchangeably.
 
-# + editable=true slideshow={"slide_type": ""}
+# %% editable=true slideshow={"slide_type": ""}
 from typing import Literal
 from dataclasses import dataclass
 import scipy.interpolate
@@ -180,7 +178,7 @@ def encode(f):
             return (f.x, f.y, f._kind, f.axis, f.copy, f.bounds_error, f.fill_value)
 
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # ## Estimate the risk quantile function
 #
 # Also known as the point probability function (PPF), this is the function $q(Φ)$ required by `draw_R_samples` to draw samples of the expected risk. Note that this is the quantile function *of the risk*, not of models predictions, so it is always a 1d function.
@@ -192,12 +190,12 @@ def encode(f):
 # - The EMD approximation defines a distribution over quantile functions of the risk.
 #   - It sets the square root of the *metric variance* at $Φ$ to be proportional to the discrepancy between $q^*(Φ)$ and $\tilde{q}(Φ)$.
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # In some cases it may be possible to derive $q$ analytically from a models equations, but in general we need to estimate it from samples. Concretelly all this does is construct a `scipy.interpolate.interp1d` object: if $L$ risk samples are given, they are sorted, and assigned the cumulative probabilities $Φ = \frac{1}{L+1}, \frac{2}{L+1}, \dotsc, \frac{L}{L+1}$. Intermediate values are obtained by linear interpolation. We don’t assign  the $Φ=0$ and $Φ=1$, since it is more conservative to assume that the absolute extrema have not been sampled – instead we use linear extrapolation for the small intervals $\bigl[0, \frac{1}{L+1}\bigr)$ and $\bigl(\frac{L}{L+1}, 1\bigr]$.
 #
 # If users want to use different assumptions – for example if users know that the highest possible risk is part of the sample set – then they may construct the `scipy.interpolate.interp1d` object directly.
 
-# + editable=true slideshow={"slide_type": ""}
+# %% editable=true slideshow={"slide_type": ""}
 def make_empirical_risk_ppf(risk_samples: Array[float,1]):
     """Convert a set of samples into a callable and serializable PPF function.
 
@@ -237,7 +235,7 @@ def make_empirical_risk_ppf(risk_samples: Array[float,1]):
     return interp1d(Φarr, np.sort(risk_samples), fill_value="extrapolate")
 
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # ## Draw samples of the expected risk $R$
 #
 # **Given**
@@ -254,17 +252,18 @@ def make_empirical_risk_ppf(risk_samples: Array[float,1]):
 #
 # See {prf:ref}`alg-estimator-statistics`.
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # :::{margin}  
 # The rule for computing `new_M` comes from the following ($ε$: `stderr`, $ε_t$: `stderr_tol`, $M'$: `new_M`)
-# $$\begin{aligned}
+# ```{math}
 # ε &= σ/\sqrt{M} \\
 # ε' &= σ/\sqrt{M'} \\
 # \frac{ε^2}{ε'^2} &= \frac{M'}{M}
-# \end{aligned}$$
+# \end{aligned}
+# ```
 # :::
 
-# + editable=true slideshow={"slide_type": ""} tags=["remove-cell"]
+# %% editable=true slideshow={"slide_type": ""} tags=["remove-cell"]
 @memoize(ignore=["path_progbar"])
 def draw_R_samples(mixed_risk_ppf: Callable,
                    synth_risk_ppf: Callable,
@@ -394,7 +393,7 @@ def generate_paths(M, previous_M=0, qstar=mixed_risk_ppf, δemd=δemd, c=c, res=
     return np.array(m1)
 
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # ### Test sampling of expected risk $R$
 #
 # $$\begin{aligned}
@@ -420,7 +419,7 @@ def generate_paths(M, previous_M=0, qstar=mixed_risk_ppf, δemd=δemd, c=c, res=
 #
 # Within the EMD framework, models are compared as usual based on their expected risk $R$. This captures aleatoric uncertainty – i.e. randomness inherent to the model, such as the $ξ$ process above. The EMD criterion then further captures *epistemic* uncertainty by treating $R$ as a random variable, and considering *its* distribution. Roughly speaking, the better a model is at predicting the data distribution, the tighter its $R$ distribution will be. (For example, a model can have a lot of noise, but if we can predict the statistics of that noise accurately, then the distribution on $R$ will be tight and its uncertainty low.)
 
-# + editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "hide-input"]
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "hide-input"]
 # λ = 1
 # λB = 1.2
 # σ = 0.25
@@ -464,13 +463,13 @@ def generate_paths(M, previous_M=0, qstar=mixed_risk_ppf, δemd=δemd, c=c, res=
 # mixed_ppfB = make_empirical_risk_ppf(riskB(observed_data))
 # synth_ppfB = make_empirical_risk_ppf(riskB(synth_dataB))
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # In this example, the discrepancy between the theoretical models and the observed data distribution is very small, so the differences between quantile curves is similarly very small.
 #
 # - **Synthetic** PPF — Same theoretical model for both the defining the risk and generating the (synthetic) data.
 # - **Mixed** PPF – Different models for the risk and data: Again a theoretical model is used to define the risk, but now it is evaluated on real observed data.
 
-# + editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "hide-input"]
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "hide-input"]
 # # Data panel
 # xarr = np.linspace(0, 3)
 # scat_data = hv.Scatter(zip(*observed_data), group="data", label="data")
@@ -484,7 +483,7 @@ def generate_paths(M, previous_M=0, qstar=mixed_risk_ppf, δemd=δemd, c=c, res=
 #     hv.opts.Curve("model B", color=colors["dark red"], alpha=0.7)
 # )
 
-# + editable=true slideshow={"slide_type": ""} tags=["hide-input", "active-ipynb"]
+# %% editable=true slideshow={"slide_type": ""} tags=["hide-input", "active-ipynb"]
 # # Quantile function panels
 # Φarr = np.linspace(0, 1, 100)
 # curve_synthA = hv.Curve(zip(Φarr, synth_ppfA(Φarr)), kdims=["Φ"], vdims=["q"], group="synth", label="model A")
@@ -508,7 +507,7 @@ def generate_paths(M, previous_M=0, qstar=mixed_risk_ppf, δemd=δemd, c=c, res=
 #     hv.opts.Layout(sublabel_format="")
 # )
 
-# + editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "hide-input"]
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "hide-input"]
 # # R dist panels
 # RA_lst = draw_R_samples(mixed_ppfA, synth_ppfA, c=1, M=100, print_relstderr=True)
 # RB_lst = draw_R_samples(mixed_ppfB, synth_ppfB, c=1, M=100, print_relstderr=True)
@@ -522,13 +521,13 @@ def generate_paths(M, previous_M=0, qstar=mixed_risk_ppf, δemd=δemd, c=c, res=
 # panel_RA = distA * RAlines
 # panel_RB = distB * RBlines
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # Each model induces a distribution for its expected risk, so with two models $A$ and $B$ we have distributions for $R_A$ and $R_B$.
 # The figures below show the sampled values for $R_A$ and $R_B$, along overlayed with a kernel density estimate of their distribution.
 #
 # In this case both distributions are very tight, and any difference between them are due as much to finite sampling than to the likelihood picking up which one is the better fit. This translates into distributions with very high overlap, and therefore a probability approximately ½ that model $A$ is better than $B$. In other words, the criterion is *equivocal* between $A$ and $B$, as we expected.
 
-# + editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "hide-input"]
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "hide-input"]
 # fig = panel_RA + panel_RB + panel_RA*panel_RB
 #
 # panel_RA.opts(
@@ -546,16 +545,18 @@ def generate_paths(M, previous_M=0, qstar=mixed_risk_ppf, δemd=δemd, c=c, res=
 #     hv.opts.Layout(sublabel_format="")
 # )
 
-# + editable=true slideshow={"slide_type": ""} tags=["active-ipynb"]
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb"]
 # (supp_emd-implementation_Bemd)=
-# ## Implementation of $\Bemd{AB;c}$
+# ## Implementation of $B^{\mathrm{emd}}_{AB;c}$
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # The EMD criterion is defined as
-# $$\begin{aligned}
+# ```{math}
+# \begin{aligned}
 # \Bemd{AB;c} &:= P(R_A < R_B \mid c) \\
 #     &\approx \frac{1}{M_A M_B} \sum_{i=1}^{M_A} \sum_{j=1}^{M_B} \boldsymbol{1}_{R_{A,i} < R_{B,j}}\,,
-# \end{aligned}$$
+# \end{aligned}
+# ```
 # where $c$ is a scale parameter and $M_A$ (resp. $M_B$) is the number of samples of $R_A$ (resp. $R_B$). The expression $\boldsymbol{1}_{R_{A,i} < R_{B,j}}$ is one when $R_{A,i} < R_{B,j}$ and zero otherwise.
 # We can write the sum as nested Python loops:
 #
@@ -563,23 +564,23 @@ def generate_paths(M, previous_M=0, qstar=mixed_risk_ppf, δemd=δemd, c=c, res=
 # (`RA_lst` and `RB_lst` are the expected risk samples generated in the example above.)
 # :::
 
-# + editable=true slideshow={"slide_type": ""} tags=["active-ipynb"]
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb"]
 # s = 0
 # for rA in RA_lst:
 #     for rB in RB_lst:
 #         s += (rA < rB)
 # s / len(RA_lst) / len(RB_lst)
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # However it is much faster (about 50x in this case) to use a NumPy ufunc:
 
-# + editable=true slideshow={"slide_type": ""} tags=["active-ipynb"]
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb"]
 # np.less.outer(RA_lst, RB_lst).mean()
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # The `Bemd` function is essentially a convenience function for comparing two models, which calls `draw_R_samples` (once for each models) and then computes the criterion value as above.
 
-# + editable=true slideshow={"slide_type": ""}
+# %% editable=true slideshow={"slide_type": ""}
 def mp_wrapper(f: Callable, *args, out: "mp.queues.Queue", **kwargs):
     "Wrap a function by putting its return value in a Queue. Used for multiprocessing."
     out.put(f(*args, **kwargs))
@@ -587,7 +588,7 @@ def mp_wrapper(f: Callable, *args, out: "mp.queues.Queue", **kwargs):
 LazyPartial = Union[Callable, Tuple[Callable, Mapping]]
 
 
-# + editable=true slideshow={"slide_type": ""}
+# %% editable=true slideshow={"slide_type": ""}
 @memoize(ignore=["progbarA", "progbarB", "use_multiprocessing"])
 def Bemd(mixed_risk_ppfA: Callable, mixed_risk_ppfB: Callable,
          synth_risk_ppfA: Callable, synth_risk_ppfB: Callable,
@@ -735,12 +736,12 @@ def Bemd(mixed_risk_ppfA: Callable, mixed_risk_ppfB: Callable,
     
     return np.less.outer(RA_lst, RB_lst).mean()
 
-# + [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # Calling `Bemd` returns the same value as above, up to sampling error:
 
-# + tags=["active-ipynb"] editable=true slideshow={"slide_type": ""}
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb"]
 # Bemd(mixed_ppfA, mixed_ppfB, synth_ppfA, synth_ppfB, c=1)
 
-# + editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "remove-input"]
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "remove-input"]
 # from emd_falsify.utils import GitSHA
 # GitSHA()
diff --git a/emd_falsify/memoize.py b/src/emd_falsify/memoize.py
similarity index 100%
rename from emd_falsify/memoize.py
rename to src/emd_falsify/memoize.py
diff --git a/src/emd_falsify/path_sampling.md b/src/emd_falsify/path_sampling.md
new file mode 100644
index 0000000..737d7df
--- /dev/null
+++ b/src/emd_falsify/path_sampling.md
@@ -0,0 +1,1598 @@
+---
+jupytext:
+  encoding: '# -*- coding: utf-8 -*-'
+  formats: py:percent,md:myst
+  notebook_metadata_filter: -jupytext.text_representation.jupytext_version
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+kernelspec:
+  display_name: Python (emd-paper)
+  language: python
+  name: emd-paper
+---
+
++++ {"tags": ["remove-cell"], "editable": true, "slideshow": {"slide_type": ""}}
+
+---
+math:
+  '\lnLtt': '{\tilde{l}}'
+  '\EE'   : '{\mathbb{E}}'
+  '\VV'   : '{\mathbb{V}}'
+  '\nN'   : '{\mathcal{N}}'
+  '\Mvar' : '{\mathop{\mathrm{Mvar}}}'
+  '\Beta' : '{\mathop{\mathrm{Beta}}}'
+  '\pathP': '{\mathop{\mathcal{P}}}'
+  '\lnLh' : '{\hat{l}}'
+  '\emdstd': '{\tilde{σ}}'
+  '\EMD'  : '{\mathrm{EMD}}'
+---
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+(supp_path-sampling)=
+# Sampling quantile paths
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+```{only} html
+{{ prolog }}
+%{{ startpreamble }}
+%{{ endpreamble }}
+
+$\renewcommand{\lnLh}[1][]{\hat{l}_{#1}}
+\renewcommand{\emdstd}[1][]{\tilde{σ}_{{#1}}}$
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb, remove-cell]
+---
+# import jax
+# import jax.numpy as jnp
+# from functools import partial
+# jax.config.update("jax_enable_x64", True)
+```
+
+```{code-cell}
+:tags: [hide-input]
+
+import logging
+import warnings
+import math
+import time
+#import multiprocessing as mp
+import numpy as np
+import scipy.special
+import scipy.optimize
+#from scipy.special import digamma, polygamma
+#from scipy.optimize import root, brentq
+from tqdm.auto import tqdm
+
+from collections.abc import Callable
+from typing import Optional, Union, Literal, Tuple, Generator
+from scityping import Real
+from scityping.numpy import Array, Generator as RNGenerator
+
+from emd_falsify.digitize import digitize  # Used to improve numerical stability when finding Beta parameters
+```
+
++++ {"tags": ["remove-cell"]}
+
+Notebook-only imports
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb, hide-input]
+---
+import itertools
+import scipy.stats
+import holoviews as hv
+from myst_nb import glue
+
+from emd_falsify.utils import GitSHA
+from config import config  # Uses config from CWD
+
+hv.extension(config.viz.backend)
+```
+
+```{code-cell}
+:tags: [remove-cell]
+
+logger = logging.getLogger(__name__)
+```
+
+We want to generate paths $\lnLh$ for the quantile function $l(Φ)$, with $Φ \in [0, 1]$, from a stochastic process $\pathP$ determined by $\lnLtt(Φ)$ and $\emdstd(Φ)$. This process must satisfy the following requirements:
+- It must generate only monotone paths, since quantile functions are monotone.
+- The process must be heteroscedastic, with variability at $Φ$ given by $\emdstd(Φ)$.
+- Generated paths should track the function $\lnLtt(Φ)$, and track it more tightly when variability is low.
+- Paths should be “temporally uncorrelated”: each stop $Φ$ corresponds to a different ensemble of data points, which can be drawn in any order. So we don't expect any special correlation between $\lnLh(Φ)$ and $\lnLh(Φ')$, beyond the requirement of monotonicity.
+  + In particular, we want to avoid defining a stochastic process which starts at one point and accumulates variance, like the $\sqrt{t}$ envelope characteristic of a Gaussian white noise.
+  + Concretely, we require the process to be “$Φ$-symmetric”: replacing $\lnLtt(Φ) \to \lnLtt(-Φ)$ and $\emdstd(Φ) \to \emdstd(-Φ)$ should define the same process, just inverted along the $Φ$ axis.
+
++++
+
+(supp_path-sampling_hierarchical-beta)=
+## Hierarchical beta process
+
++++
+
+Because the joint requirements of monotonicity, non-stationarity and $Φ$-symmetry are uncommon for a stochastic process, some care is required to define an appropriate $\pathP$. The approach we choose here is to first select the end points $\lnLh(0)$ and $\lnLh(1)$, then fill the interval by successive binary partitions: first $\bigl\{\lnLh\bigl(\tfrac{1}{2}\bigl)\bigr\}$, then $\bigl\{\lnLh\bigl(\tfrac{1}{4}\bigr), \lnLh\bigl(\tfrac{3}{4}\bigr)\bigr\}$, $\bigl\{\lnLh\bigl(\tfrac{1}{8}\bigr), \lnLh\bigl(\tfrac{3}{8}\bigr), \lnLh\bigl(\tfrac{5}{8}\bigr), \lnLh\bigl(\tfrac{7}{8}\bigr)\bigr\}$, etc. (Below we will denote these ensembles $\{\lnLh\}^{(1)}$, $\{\lnLh\}^{(2)}$, $\{\lnLh\}^{(3)}$, etc.) Thus integrating over paths becomes akin to a path integral with variable end points.
+Moreover, instead of drawing quantile values, we draw increments
+```{math}
+:label: eq_def-quantile-increment
+Δ l_{ΔΦ}(Φ) := \lnLh(Φ+ΔΦ) - \lnLh(Φ) \,.
+```
+Given two initial end points $\lnLh(0)$ and $\lnLh(1)$, we therefore first we draw the pair $\bigl\{Δ l_{2^{-1}}(0),\; Δ l_{2^{-1}}\bigl(2^{-1}\bigr)\}$, which gives us
+```{math}
+\lnLh\bigl(2^{-1}\bigr) = \lnLh(0) + Δ l_{2^{-1}}(0) = \lnLh(1) - Δ l_{2^{-1}}\bigl(2^{-1}\bigr)\,.
+```
+Then $\bigl\{\lnLh(0), \lnLh\bigl(\frac{1}{2}\bigr) \bigr\}$ and $\bigl\{ \lnLh\bigl(\frac{1}{2}\bigr), \lnLh(1) \bigr\}$ serve as end points to draw $\bigl\{Δ l_{2^{-2}}\bigl(0\bigr),\; Δ l_{2^{-2}}\bigl(2^{-2}\bigr) \bigr\}$ and $\bigl\{Δ l_{2^{-2}}\bigl(2^{-1}\bigr),\; Δ l_{2^{-2}}\bigl(2^{-1} + 2^{-2}\bigr) \bigr\}$. We repeat the procedure as needed, sampling smaller and smaller incremenents, until the path has the desired resolution. As the increments are constrained:
+```{math}
+Δ l_{2^{-n}}(Φ) \in \bigl( 0, \lnLh(Φ+2^{-n+1}) - \lnLh(Φ)\,\bigr)\,,
+```
+the path thus sampled is always monotone. Note also that increments must be drawn in pairs (or more generally as a *combination*) of values constrained by their sum:
+```{math}
+:label: eq_sum-constraint
+Δ l_{2^{-n}}\bigl(Φ\bigr) + Δ l_{2^{-n}}\bigl(Φ + 2^{-n} \bigr) \stackrel{!}{=} \lnLh(Φ+2^{-n+1}) - \lnLh(Φ) \,.
+```
+The possible increments therefore lie on a 1-simplex, for which a natural choice is to use a beta distribution[^1], with the random variable corresponding to the first increment $Δ l_{2^{-n}}(Φ)$. The density function of a beta random variable has the form
+```{math}
+:label: eq_beta-pdf
+p(x_1) \propto x^{α-1} (1-x)^{β-1}\,,
+```
+with $α$ and $β$ parameters to be determined.
+
++++
+
+:::{important}  
+An essential property of a stochastic process is *consistency*: it must not matter exactly how we discretize the interval {cite:p}`gillespieMathematicsBrownianMotion1996`. Let $\{\lnLh\}^{(n)}$ denote the steps which are added when we refine the discretization from steps of $2^{-n+1}$ to steps of $2^{-n}$:
+```{math}
+:label: eq_added-steps
+\{\lnLh\}^{(n)} := \bigl\{\lnLh(k\cdot 2^{-n}) \,\big|\, k=1,3,\dotsc,2^n \bigr\} \,.
+```
+A necessary condition for consistency is that coarsening the discretization from steps of $2^{-n}$ to steps of $2^{-n+1}$ (i.e. marginalizing over the points at $\{\lnLh\}^{(n)}$) does not substantially change the probability law:
+```{math}
+:label: eq_consistency-condition
+p\bigl(\{\lnLh\}^{(n)}\bigr)\bigr) \stackrel{!}{=} \int p\bigl(\{\lnLh\}^{(n)} \,\big|\, \{\lnLh\}^{(n+1)}\bigr) \,d\{\lnLh\}^{(n+1)} \;+\; ε\,,
+```
+with $ε$ vanishing as $n$ is increased to infinity.
+
+We have found that failure to satisfy this requirement leads to unsatisfactory sampling of quantile paths. In particular, naive procedures tend to perform worse as $ΔΦ$ is reduced, making accurate integration impossible.
+:::
+
++++
+
+[^1]: One could conceivably draw all increments at once, with a [*shifted scaled Dirichlet distribution*](https://doi.org/10.1007/978-3-030-71175-7_4) instead of a beta, if it can be shown that also in this case coarsening the distribution still results in the same probability law.
+
++++
+
+(supp_path-sampling_conditions-beta-param)=
+### Conditions for choosing the beta parameters
+
+:::{margin}
+Recall that we made the assumption that the variability of the path process $\pathP$ should determined by $δ^{\EMD}$, up to some constant $c$.{cite:p}`reneFalsifyingModels2024` This constant is determined by a calibration experiment.
+To keep expressions concise, in this section we use $\emdstd(Φ) := c δ^{\EMD}(Φ)$.
+:::
+To draw an increment $Δ l_{2^{-n}}$, we need to convert $\lnLtt(Φ)$ and $\emdstd(Φ) := c δ^{\EMD}(Φ)$ into beta distribution parameters $α$ and $β$. If $x_1$ follows a beta distribution, then its first two cumulants are given by
+```{math}
+\begin{aligned}
+x_1 &\sim \Beta(α, β) \,, \\
+\EE[x_1] &= \frac{α}{α+β} \,, \\
+\VV[x_1] &= \frac{αβ}{(α+β)^2(α+β+1)} \,. \\
+\end{aligned}
+```
+However, as discussed by Mateu-Figueras et al. (2021, 2011), to properly account for the geometry of a simplex, one should use instead statistics with respect to the Aitchison measure, sometimes referred to as the *center* and *metric variance*. Defining $x_2 = 1-x_1$, these can be written (Mateu-Figueras et al., 2021)
+```{math}
+:label: eq_Aitchison-moments
+
+\begin{align}
+\EE_a[(x_1, x_2)] &= \frac{1}{e^{ψ(α)} + e^{ψ(β)}} \bigl[e^{ψ(α)}, e^{ψ(β)}\bigr] \,, \\
+\Mvar[(x_1, x_2)] &= \frac{1}{2} \bigl(ψ_1(α) + ψ_1(β)\bigr) \,. \\
+\end{align}
+```
+Here $ψ$ and $ψ_1$ are the digamma and trigamma functions respectively.
+(In addition to being more appropriate, the center and metric variance are also better suited for defining a consistent stochastic process. For example, since the metric variance is unbounded, we can always scale it with $\emdstd(Φ)$ without exceeding its domain.)
+
++++
+
+Since we want the sum to be $d := \lnLh(Φ+2^{-n+1}) - \lnLh(Φ)$, we define
+```{math}
+:label: eq_relation-beta-increment
+\bigl[Δ l_{2^{-n}}\bigl(Φ\bigr),\, Δ l_{2^{-n}}\bigl(Φ+2^{-n})\bigr)\bigr] = d \bigl[x_1, x_2\bigr] \,.
+```
+Then
+
++++
+
+$$\begin{aligned}
+\EE_a\Bigl[\bigl[Δ l_{2^{-n}}\bigl(Φ\bigr),\, Δ l_{2^{-n}}\bigl(Φ+2^{-n}\bigr)\bigr]\Bigr] &= \frac{d}{e^{ψ(α)} + e^{ψ(β)}} \bigl[e^{ψ(α)}, e^{ψ(β)}\bigr] \,, \\
+\Mvar\Bigl[\bigl[Δ l_{2^{-n}}\bigl(Φ\bigr),\, Δ l_{2^{-n}}\bigl(Φ+2^{-n}\bigr)\bigr]\Bigr] &= \frac{1}{2} \bigl(ψ_1(α) + ψ_1(β)\bigr) \,.
+\end{aligned}$$
+
++++
+
+We now choose to define the parameters $α$ and $β$ via the following relations:
+:::{admonition} &nbsp;
+:class: important
+
+$$\begin{aligned}
+\EE_a\Bigl[\bigl[Δ l_{2^{-n}}\bigl(Φ\bigr),\, Δ l_{2^{-n}}\bigl(Φ+2^{-n}\bigr)\bigr]\Bigr] &\stackrel{!}{=}^* \bigl[\, \lnLtt\bigl(Φ+2^{-n}\bigr) - \lnLtt\bigl(Φ\bigr),\,\lnLtt\bigl(Φ+2^{-n+1}\bigr) - \lnLtt\bigl(Φ+2^{-n}\bigr) \,\bigr]\,, \\
+\Mvar\Bigl[\bigl[Δ l_{2^{-n}}\bigl(Φ\bigr),\, Δ l_{2^{-n}}\bigl(Φ+2^{-n}\bigr)\bigr]\Bigr] &\stackrel{!}{=} \emdstd\bigl(Φ+2^{-n}\bigr)^2 \,.
+\end{aligned}$$ (eq_defining-conditions-a)
+:::
+
++++
+
+These follow from interpretating $\lnLtt$ and $\emdstd$ as estimators for the center and square root of the metric variance.
+We use $=^*$ to indicate equality in spirit rather than true equality, since strictly speaking, these are 3 equations for 2 unknown. To reduce the $\EE_a$ equations to one, we use instead
+
+:::{admonition} &nbsp;
+:class: important
+
+$$\frac{\EE_a\bigl[Δ l_{2^{-n}}\bigl(Φ\bigr)\bigr]}{\EE_a \bigl[Δ l_{2^{-n}}\bigl(Φ+2^{-n}\bigr)\bigr]} \stackrel{!}{=} \frac{\lnLtt\bigl(Φ+2^{-n}\bigr) - \lnLtt\bigl(Φ\bigr)}{\lnLtt\bigl(Φ+2^{-n+1}\bigr) - \lnLtt\bigl(Φ+2^{-n}\bigr)} \,.$$ (eq_defining_conditions-b)
+:::
+
++++
+
+**Remarks**
+- We satisfy the necessary condition for consistency by construction:
+  ```{math}
+  p\bigl(\{l\}^{(n)}\bigr)\bigr) = \int p\bigl(\{l\}^{(n)} \,\big|\, \{l\}^{(n+1)}\bigr) \,d\{l\}^{(n+1)}\,.
+  ```
+- The stochastic process is not Markovian, so successive increments are not independent. The variance of a larger increment therefore need not equal the sum of the variance of constituent smaller ones; in other words,
+  ```{math}
+  Δ l_{2^{-n+1}}\bigl(Φ\bigr) = Δ l_{2^{-n}}\bigl(Φ\bigr) + Δ l_{2^{-n}}\bigl(Φ+2^{-n}\bigr)
+  ```
+  does *not* imply
+  ```{math}
+  \VV\bigl[Δ l_{2^{-n+1}}\bigl(Φ\bigr)\bigr] = \VV\bigl[Δ l_{2^{-n}}\bigl(Φ\bigr)\bigr] + \VV\bigl[Δ l_{2^{-n}}\bigl(Φ+2^{-n}\bigr)\bigr]\,.
+  ```
+- Our defining equations make equivalent use of the pre ($Δ l_{2^{-n}}(Φ)$) and post ($Δ l_{2^{-n}}(Φ+2^{-n})$) increments, thus preserving symmetry in $Φ$.
+- Step sizes of the form $2^{-n}$ have exact representations in binary. Thus even small step sizes should not introduce additional numerical errors.
+
++++
+
+(supp_path-sampling_beta-param-algorithm)=
+### Formulation of the parameter equations as a root-finding problem
+
++++
+
+Define
+:::{admonition} &nbsp;
+:class: important
+
+$$\begin{align}
+r &:= \frac{\lnLtt(Φ+2^{-n}) - \lnLtt(Φ)}{\lnLtt(Φ+2^{-n+1}) - \lnLtt(Φ+2^{-n})} \,; \\
+v &:= 2 \emdstd\bigl(Φ + 2^{-n}\bigr)^2 \,.
+\end{align}$$ (eq_def-r-v) 
+:::
+The first value, $r$, is the ratio of two subincrements within $Δ l_{2^{-n+1}}(Φ)$.
+Setting $\frac{e^{ψ(α)}}{e^{ψ(β)}} = r$, the two equations we need to solve for $α$ and $β$ can be written
+```{math}
+:label: eq_root-finding-problem
+
+\begin{align}
+ψ(α) - ψ(β) &= \ln r \,; \\
+\ln\bigl[ ψ_1(α) + ψ_1(β) \bigr] &= \ln v \,.
+\end{align}
+```
+Note that these equations are symmetric in $Φ$: replacing $Φ$ by $-Φ$ simply changes the sign on both sides of the first. The use of the logarithm in the equation for $v$ helps to stabilize the numerics.
+
+```{code-cell}
+def f(lnαβ, lnr_v, _array=np.array, _exp=np.exp, _log=np.log,
+      digamma=scipy.special.digamma, polygamma=scipy.special.polygamma):
+    α, β = _exp(lnαβ).reshape(2, -1)  # scipy's `root` always flattens inputs
+    lnr, v = lnr_v
+    return _array((
+        digamma(α) - digamma(β) - lnr,
+        _log(polygamma(1, α) + polygamma(1, β)) - _log(v)
+    )).flat
+
+def f_mid(lnα, v, _exp=np.exp, _log=np.log, polygamma=scipy.special.polygamma):
+    "1-d equation, for the special case α=β (equiv to r=1)"
+    return _log(2*polygamma(1, _exp(lnα))) - _log(v)
+```
+
+```{code-cell}
+:tags: [active-ipynb, remove-cell]
+
+# #@partial(jax.jit, static_argnames=("_array", "_exp", "_log", "digamma", "polygamma"))
+# def f(lnαβ, lnr_v, _array=jnp.array, _exp=jnp.exp, _log=jnp.log,
+#       digamma=jax.scipy.special.digamma, polygamma=jax.scipy.special.polygamma):
+#     α, β = _exp(lnαβ).reshape(2, -1)  # scipy's `root` always flattens inputs
+#     lnr, v = lnr_v
+#     return _array((
+#         digamma(α) - digamma(β) - lnr,
+#         _log(polygamma(1, α) + polygamma(1, β)) - _log(v)
+#     )).flatten()
+
+# #@partial(jax.jit, static_argnames=("_exp", "_log", "polygamma"))
+# def f_mid(lnα, v, _exp=jnp.exp, _log=jnp.log, polygamma=jax.scipy.special.polygamma):
+#     "1-d equation, for the special case α=β (equiv to r=1)"
+#     return _log(2*polygamma(1, _exp(lnα))) - _log(v)
+```
+
+:::{margin}  
+This implementation of the Jacobian is tested for both scalar and vector inputs, but fits turned out to be both faster and more numerically stable when they don't use it.
+Therefore we keep it only for reference and illustration purposes.  
+:::
+
+```{code-cell}
+:tags: [active-ipynb]
+
+def jac(lnαβ, lnr_v):
+    lnα, lnβ = lnαβ.reshape(2, -1)      # scipy's `root` always flattens inputs
+    α, β = np.exp(lnαβ).reshape(2, -1)
+    j = np.block([[np.diagflat(digamma(α)*lnα), np.diagflat(-digamma(β)*lnβ)],
+                     [np.diagflat(polygamma(1,α)*lnα), np.diagflat(polygamma(1,β)*lnβ)]])
+    return j
+```
+
+The functions $ψ$ and $ψ_1$ diverge at zero, so $α$ and $β$ should remain positive. Therefore it makes sense to fit their logarithm: this enforces the lower bound, and improves the resolution where the derivative is highest. The two objective functions (up to scalar shift) are plotted below: the region for low $\ln α$ and $\ln β$ shows sharp variation around $α=β$, suggesting that this area may be most challenging for a numerical optimizer. In practice this is indeed what we observed.
+
+We found however that we can make fits much more reliable by first choosing a suitable initialization point along the $\ln α = \ln β$ diagonal. In practice this means setting $α_0 = α = β$ and solving the first equation of Eqs. {eq}`eq_root-finding-problem` for $α_0$. (We use the implementation of Brent’s method in SciPy.) Then we can solve the full 2d problem of Eqs. {eq}`eq_root-finding-problem`, with $(α_0, α_0)$ as initial value. This procedure was successful for all values of $r$ and $v$ we encountered in our experiments.
+
+```{code-cell}
+:tags: [active-ipynb, hide-input]
+
+α = np.logspace(-2, 1.2)
+β = np.logspace(-2, 1.2).reshape(-1, 1)
+digamma, polygamma = scipy.special.digamma, scipy.special.polygamma
+
+EEa = digamma(α) / (digamma(α) + digamma(β))
+Mvar = 0.5*(polygamma(1, α) + polygamma(1, β))
+domλ = np.array([[(ReJ:=np.real(np.linalg.eigvals(jac(np.stack((lnα, lnβ)), 0))))[abs(ReJ).argmax()]
+                    for lnβ in np.log(β.flat)]
+                   for lnα in np.log(α.flat)])
+```
+
+```{code-cell}
+:tags: [active-ipynb, hide-input]
+
+dim_lnα = hv.Dimension("lnα", label=r"$\ln α$")
+dim_lnβ = hv.Dimension("lnβ", label=r"$\ln β$")
+dim_ψα  = hv.Dimension("ψα",  label=r"$ψ(α)$")
+dim_ψ1α = hv.Dimension("ψ1α", label=r"$ψ_1(α)$")
+dim_Eobj = hv.Dimension("Eobj", label=r"$ψ(α)-ψ(β)$")
+dim_lnMvar = hv.Dimension("Mvar", label=r"$\ln \mathrm{{Mvar}}[x_1, x_2]$")  # Doubled {{ because Holoviews applies .format to title
+dim_Reλ = hv.Dimension("Reλ", label=r"$\mathrm{{Re}}(λ)$")
+fig = hv.Curve(zip(np.log(α.flat), digamma(α.flat)), kdims=[dim_lnα], vdims=[dim_ψα], label=dim_ψα.name).opts(title=dim_ψα.label) \
+      + hv.Curve(zip(np.log(α.flat), polygamma(1, α.flat)), kdims=[dim_lnα], vdims=[dim_ψ1α], label=dim_ψα.name).opts(title=dim_ψ1α.label) \
+      + hv.QuadMesh((np.log(α.flat), np.log(β.flat), digamma(α)-digamma(β)),
+                    kdims=[dim_lnα, dim_lnβ],
+                    vdims=[dim_Eobj], label=dim_Eobj.name).opts(title=dim_Eobj.label) \
+      + hv.QuadMesh((np.log(α.flat), np.log(β.flat), np.log(Mvar)),
+                    kdims=[dim_lnα, dim_lnβ],
+                    vdims=[dim_lnMvar], label=dim_lnMvar.name).opts(title=dim_lnMvar.label)
+fig.opts(hv.opts.QuadMesh(colorbar=True, clabel=""))
+fig.opts(fig_inches=3, sublabel_format="", vspace=0.4, backend="matplotlib")
+fig.cols(2);
+
+#glue("fig_polygamma", fig, display=None)
+```
+
+```{code-cell}
+:tags: [remove-cell, active-ipynb]
+
+path = config.paths.figures/f"path-sampling_polygamma"
+hv.save(fig, path.with_suffix(".svg"), backend="matplotlib")
+hv.save(fig, path.with_suffix(".pdf"), backend="matplotlib")
+```
+
+:::{figure} ../../figures/path-sampling_polygamma.svg
+:name: fig_polygamma
+
+Characterization of the digamma ($ψ$) and trigamma ($ψ_1$) functions, and of the metric variance $\Mvar$.  
+:::
+
++++
+
+Plotting the eigenvalues of the Jacobian (specifically, the real part of its dominant eigenvalue) in fact highlights three regions with a center at roughly $(\ln α, \ln β) = (0, 0)$. (The Jacobian does not depend on $r$ or $v$, so this is true for all fit conditions). Empirically we found that initializing fits at $(0, 0)$ resulted in robust fits for a large number of $(r,v)$ tuples, even when $r > 100$. We hypothesize that this is because it is difficult for the fit to move from one region to another; by initializing where the Jacobian is small, fits are able to find the desired values before getting stuck in the wrong region.
+
+Note that the color scale is clipped, to better resolve values near zero. Eigenvalues quickly increase by multiple orders of magnitude away from $(0,0)$.
+
+It turns out that the only region where $(0, 0)$ is *not* a good initial vector for the root solver is when $\boldsymbol{r \approx 1}$. This can be resolved by choosing a better initial value along the $(α_0, α_0)$ diagonal, as described above. In practice we found no detriment to always using the 1d problem to select an initial vector, so we use that approach in all cases.
+
+```{code-cell}
+:tags: [active-ipynb, hide-input]
+
+fig = hv.QuadMesh((np.log(α.flat), np.log(β.flat), domλ),
+            kdims=[dim_lnα, dim_lnβ], vdims=[dim_Reλ],
+            label="Real part of dom. eig val"
+           ).opts(clim=(-1, 1), cmap="gwv",
+                  colorbar=True)
+#glue("fig_Jac-spectrum", fig, display=False)
+```
+
+```{code-cell}
+:tags: [remove-cell, active-ipynb]
+
+path = config.paths.figures/f"path-sampling_jac-spectrum"
+hv.save(fig, path.with_suffix(".svg"), backend="matplotlib")
+hv.save(fig, path.with_suffix(".pdf"), backend="matplotlib")
+```
+
+:::{figure} ../../figures/path-sampling_jac-spectrum.svg
+:name: fig_Jac-spectrum
+
+**Objective function has a saddle-point around (0,0)**
+After rewriting Eqs. {eq}`eq_root-finding-problem` in terms of $\ln α$ and $\ln β$, we compute the Jacobian $J$. Plotted is the real part of the eigenvalue $λ_i$ of $J$ for which $\lvert\mathop{\mathrm{Re}}(λ)\rvert$ is largest; this gives an indication of how quickly the fit moves away from a given point.
+In most cases, a root finding algorithm initialized at (0,0) will find a solution.
+:::
+
++++
+
+(supp_path-sampling_beta-param-special-cases)=
+### Special cases for extreme values
+
+For extreme values of $r$ or $v$, the beta distribution becomes degenerate and numerical optimization may break. We identify four cases requiring special treatment.
+
+:::::{div} full-width
+::::{grid} 1 2 3 4
+:gutter: 3
+
+:::{grid-item-card}
+
+$\boldsymbol{r = 0}$
+^^^
+
+The corresponds to stating that $Δ l_{2^{-n}}(Φ)$ is infinitely smaller than $Δ l_{2^{-n}}(Φ+2^{-n})$. Thus we set $x_1 = 1$, which is equivalent to setting
+
+$$\begin{aligned}
+Δ l_{2^{-n}}(Φ) &= 0 \,, \\
+Δ l_{2^{-n}}(Φ+2^{-n}) &= \lnLtt(Φ+2^{-n+1}) - \lnLtt(Φ) \,.
+\end{aligned}$$
+
+:::
+
+:::{grid-item-card}
+
+$\boldsymbol{r = 0}$
+^^^
+
+The converse of the previous case: $Δ l_{2^{-n}}(Φ)$ is infinitely larger than $Δ l_{2^{-n}}(Φ+2^{-n})$. We set $x_1 = 0$.
+
+:::
+
+:::{grid-item-card}
+
+$\boldsymbol{v \to 0}$
+^^^
+
+As $v$ vanishes, the distribution for $x_1$ approaches a Dirac delta centered on $\tfrac{1}{r+1}$.
+In our implementation, we replace $x_1$ by a constant when $v < 10^{-8}$.
+
+:::
+
+:::{grid-item-card}
+
+$\boldsymbol{v \to \infty}$
+^^^
+
+Having $v$ go to infinity requires that $α$ and/or $β$ go to $0$ (see Eq. {eq}`eq_root-finding-problem` and {numref}`fig_polygamma`). The probability density of $x_1$ is then a Dirac delta: placed at $x_1=0$ if $α \to 0$, or placed at $x_1 = 1$ if $β \to 0$. If both $α$ and $β$ go to $0$, the PDF must be the sum of two weighted deltas:
+```{math}
+p(x_1) = w_0 δ(x_1 - 0) + w_1 δ(x_1 - 1) \,.
+```
+The weights $w_i$ can be determined by requiring that
+```{math}
+\EE[x_1] = r \,,
+```
+which yields
+```{math}
+\begin{aligned}
+w_1 &= \frac{r}{r+1}\,, & w_2 &= \frac{1}{r+1} \,.
+\end{aligned}
+```
+(For this special case, we revert to writing the condition in terms of a standard (Lebesgue) expectation, since the center (Eq. {eq}`eq_Aitchison-moments`) is undefined when $α, β \to 0$.)
+
+Since we have already considered the special cases $r = 0$ and $r \to \infty$, we can assume $0 < r < \infty$. Then both $α$ and $β$ are zero, and $x_1$ should be a Bernoulli random variable with success probability $p = w_2 = \frac{1}{r+1}$.
+
+:::
+
+::::
+:::::
+
+```{code-cell}
+:tags: [active-ipynb]
+
+def get_beta_rv(r: Real, v: Real) -> Tuple[float]:
+    """
+    Return α and β corresponding to `r` and `v`.
+    This function is not exported: it is used only for illustrative purposes
+    within the notebook.
+    """
+    # Special cases for extreme values of r
+    if r < 1e-12:
+        return scipy.stats.bernoulli(0)  # Dirac delta at 0
+    elif r > 1e12:
+        return scipy.stats.bernoulli(1)  # Dirac delta at 1
+    # Special cases for extreme values of v
+    elif v < 1e-8:
+        return get_beta_rv(r, 1e-8)
+    elif v > 1e4:
+        # (Actual draw function replaces beta by a Bernoulli in this case)
+        return scipy.stats.bernoulli(1/(r+1))
+    
+    # if v < 1e-6:
+    #     # Some unlucky values, like r=2.2715995006941436, v=6.278153793994013e-08,
+    #     # seem to be particularly pathological for the root solver.
+    #     # At least in the case above, the function is continuous at those values
+    #     # (±ε returns very similar values for a and b).
+    #     # Replacing these by nearby values which are more friendly to binary representation
+    #     # seems to help.
+    #     v = digitize(v, rtol=1e-5, show=False)
+    
+    # if 0.25 < r < 4:
+    #     # Special case for r ≈ 1: improve initialization by first solving r=1 <=> α=β
+    # Improve initialization by first solving r=1 <=> α=β
+    x0 = scipy.optimize.brentq(f_mid, -5, 20, args=(v,))
+    x0 = (x0, x0)
+    # else:
+    #     # Normal case: Initialize fit at (α, β) = (1, 1)
+    #     x0 = (0, 0)
+    res = scipy.optimize.root(f, x0, args=[math.log(r), v])
+    if not res.success:
+        logger.error("Failed to determine α & β parameters for beta distribution. "
+                     f"Conditions were:\n  {r=}\n{v=}")
+    α, β = np.exp(res.x)
+    return scipy.stats.beta(α, β)
+```
+
+```{code-cell}
+:tags: [remove-cell, active-ipynb]
+
+# import jaxopt
+
+# @partial(jax.jit, static_argnums=(0,))
+# def jaxopt_bisection_solver(f, a, b, args):  # Signature is compatible with scipy.optimize.brentq
+#     bisec = jaxopt.Bisection(f, -5, 20, check_bracket=False, jit=True)
+#     return bisec.run(None, *args).params
+    
+# #@partial(jax.jit, static_argnames=("fun", "x0", "args", "method"))
+# def jaxopt_mvroot_solver(fun, x0, args, method):  # Signature is compatible with scipy.optimize.root
+#     rootsolver = jaxopt.ScipyRootFinding(method=method, optimality_fun=fun, jit=True)
+#     res = rootsolver.run(x0, args)
+#     return res.params, res.state.success
+```
+
+```{code-cell}
+def scipy_mvroot_solver(fun, x0, args, method, root=scipy.optimize.root):
+    res = root(fun, x0, args, method)
+    return res.x, res.success
+```
+
+```{code-cell}
+:tags: [hide-input]
+
+def _draw_from_beta_scalar(r: Real, v: Real, rng: RNGenerator, n_samples: int=1,
+                           *, _log=math.log, _exp=np.exp, _shape=np.shape,
+                           bracketed_solver=scipy.optimize.brentq, mvroot_solver=scipy_mvroot_solver
+                          ) -> Tuple[float]:
+    rng = np.random.default_rng(rng)  # No-op if `rng` is already a Generator
+    size = None if n_samples == 1 else (*_shape(r), n_samples)
+    # Special cases for extreme values of r
+    if r < 1e-12:
+        special_val = 1           # EXIT AT END
+    elif r > 1e12:
+        special_val = 0           # EXIT AT END
+    # Special cases for extreme values of v
+    elif v < 1e-8:
+        special_val = 1 / (1+r)   # EXIT AT END
+    elif v > 1e4:
+        # Replace beta by a Bernouilli distribution
+        return rng.binomial(1, 1/(r+1), size=size)
+    
+    # Normal case
+    else:
+
+        # if v < 1e-6:
+        #     # Some unlucky values, like r=2.2715995006941436, v=6.278153793994013e-08,
+        #     # seem to be particularly pathological for the root solver.
+        #     # At least in the case above, the function is continuous at those values
+        #     # (±ε returns very similar values for a and b).
+        #     # Replacing these by nearby values which are more friendly to binary representation
+        #     # seems to help.
+        #     v = digitize(v, rtol=1e-5, show=False)
+        
+        # if 0.25 < r < 4:
+        #     # Special case for r ≈ 1 and 1 < v < 1e5: Initialize on the α=β line
+        #     # In this case the initialization (0,0) is unstable, so we 
+        # First find a better initialization by solving for the 1d case
+        # where r=1 and therefore α=β.
+        # (The limits where the normal case fails are around (r=1/3, v=1e4) and (r=3, v=1e4)
+        # NB: The values -5 and 20 are slightly beyond the special case limits 5e-8 < v < 1e4 set above;
+        #     since also the trigamma function is monotone, this should always find a solution.
+        x0 = bracketed_solver(f_mid, -5, 20, args=(v,))
+        
+        x0 = (x0, x0)
+        # else:
+        #     # Normal case: Initialize fit at log(α, β) = (1, 1)
+        #     x0 = (0., 0.)
+        x, success = mvroot_solver(f, x0, args=[_log(r), v], method="hybr")
+        if not success:
+            logger.error("Failed to determine α & β parameters for beta distribution. "
+                         f"Conditions were:\n  {r=}\n  {v=}")
+        α, β = _exp(x)
+        return rng.beta(α, β, size=size)
+    
+    # Finally, if `size` was passed, ensure result has the right shape
+    # NB: We only reach this point if we go through one of the 3 first special cases
+    if size:
+        return np.array(special_val)[...,None].repeat(n_samples, axis=-1)
+    else:
+        return special_val
+
+def draw_from_beta(r: Union[Real,Array[float,1]],
+                   v: Union[Real,Array[float,1]],
+                   rng: Optional[RNGenerator]=None,
+                   n_samples: int=1
+                  ) -> Tuple[float]:
+    """
+    Return α, β for a beta distribution with a metric variance `v` and center
+    biased by `r`. More precisely, `r` is the ratio of the lengths ``c`` and
+    ``1-c``, where ``c`` is the center.
+    
+    `r` and `v` may either be scalars or arrays
+    """
+    rng = np.random.default_rng(rng)  # No-op if `rng` is already a Generator
+    
+    if hasattr(r, "__iter__"):
+        return np.array([_draw_from_beta_scalar(_r, _v, rng, n_samples)
+                          for _r, _v in zip(r, v)])
+    else:
+        return _draw_from_beta_scalar(r, v, rng, n_samples)
+```
+
+```{code-cell}
+:tags: [hide-input, active-ipynb, remove-cell]
+
+# # We can’t jit because ScipyRootFinding.run (in the normal branch) is not jittable
+# #@partial(jax.jit, static_argnames=("n_samples", "_log", "_exp", "_shape"))
+# def _draw_from_beta_scalar_jax(r: Real, v: Real, rng: Array, n_samples: int=1,
+#                                *, _log=jnp.log, _exp=jnp.exp, _shape=jnp.shape,
+#                           ) -> Tuple[float]:
+# #    rng = np.random.default_rng(rng)  # No-op if `rng` is already a Generator
+#     size = None if n_samples == 1 else (*_shape(r), n_samples)
+#     outshape = size or ()
+#     rng, subkey = jax.random.split(rng)
+#     # Special cases for extreme values of r
+    
+#     branch_idx = jnp.select(
+#         [r == 0,      # special val 1 
+#          r > 1e12,    # special val 0
+#          v < 1e-8,    # special val 1 / (1+r)
+#          v > 1e4  ],  # Replace beta by a Bernoulli distribution
+#         jnp.arange(4),
+#         default=4     # Normal branch
+#     )
+#     #x = jax.lax.switch(                              # What we actually want to use, but requires traceable _normal_branch (it automatically calls jit on its args)
+#     x = (lambda i, flst, *args: flst[i](*args))(      # Workaround: does not automatically call jit, but also isn’t traceable (so vmap will break here)
+#         branch_idx,
+#         [ lambda r,v,size,subkey: jnp.array(1)      [...,None].repeat(n_samples, axis=-1).reshape(outshape),
+#           lambda r,v,size,subkey: jnp.array(0)      [...,None].repeat(n_samples, axis=-1).reshape(outshape),
+#           lambda r,v,size,subkey: jnp.array(1/(1+r))[...,None].repeat(n_samples, axis=-1).reshape(outshape),
+#           lambda r,v,size,subkey: jax.random.bernoulli(subkey, 1/(r+1), shape=size).astype(float),
+#           _normal_branch_beta                                                                ],
+#         r, v, size, subkey  # operands
+#     )
+#     return rng, x
+
+# def _normal_branch_beta(r, v, size, subkey,
+#                         beta=jax.random.beta, _exp=jnp.exp, _log=jnp.log,
+#                         bracketed_solver=jaxopt_bisection_solver,
+#                         #mvroot_solver=jaxopt_mvroot_solver
+#                         mvroot_solver=scipy_mvroot_solver
+#                        ):
+#     # First find a better initialization by solving for the 1d case
+#     # where r=1 and therefore α=β.
+#     # (The limits where the normal case fails are around (r=1/3, v=1e4) and (r=3, v=1e4)
+#     # NB: The values -5 and 20 are slightly beyond the special case limits 5e-8 < v < 1e4 set above;
+#     #     since also the trigamma function is monotone, this should always find a solution.
+#     x0 = bracketed_solver(f_mid, -5, 20, args=(v,))
+#     x0 = jnp.tile(x0, (2,))  # Convert x0 to (x0, x0)
+#     x, success = mvroot_solver(f, x0, args=[_log(r), v], method="hybr")
+#     try:
+#         assert success
+#     except AssertionError:
+#         logger.error("Failed to determine α & β parameters for beta distribution. "
+#                      f"Conditions were:\n  {r=}\n  {v=}")
+#     α, β = _exp(x)
+#     return beta(subkey, α, β, shape=size)
+```
+
+```{code-cell}
+:tags: [active-ipynb, remove-cell]
+
+# # Does not work: _draw_from_beta_scalar_jax needs to be traceable
+# _draw_from_beta_scalar_jax_vectorized = jax.vmap(_draw_from_beta_scalar_jax, [0, 0, 0, None], 0)
+
+# def draw_from_beta_jax(r: Union[Real,Array[float,1]],
+#                        v: Union[Real,Array[float,1]],
+#                        rng: Array[np.uint32],
+#                        n_samples: int=1
+#                       ) -> Tuple[float]:
+#     """
+#     Return α, β for a beta distribution with a metric variance `v` and center
+#     biased by `r`. More precisely, `r` is the ratio of the lengths ``c`` and
+#     ``1-c``, where ``c`` is the center.
+    
+#     `r` and `v` may either be scalars or arrays
+#     """
+    
+    
+#     if hasattr(r, "__iter__"):
+#         rngkeys = jax.random.split(rng, len(r) + 1)
+#         rng = rngkeys[0]
+#         x = _draw_from_beta_scalar_jax_vectorized(r, v, rngkeys[1:], n_samples)
+#     else:
+#         rng, subkey = jax.random.split(rng)
+#         x = _draw_from_beta_scalar_jax(r, v, subkey, n_samples)
+#     return rng, x
+```
+
+```{code-cell}
+:tags: [remove-cell, active-ipynb]
+
+# # Does not work: _draw_from_beta_scalar_jax needs to be traceable
+# draw_from_beta_jax(jnp.array([1., 2., 3.]), jnp.array([1., 2., 3.]), key, 4)
+```
+
+(supp_path-sampling_example-fitted-beta)=
+### Examples of different fitted beta distributions
+
++++
+
+Plotted below are the beta distributions for different values of $r$ and $v$.
+
+```{code-cell}
+:tags: [active-ipynb, hide-input, full-width]
+
+%%opts Curve [title="Fitted beta distributions", ylim=(None,7)]
+%%opts Table [title="Empirical statistics (4000 samples)"]
+%%opts Layout [sublabel_format=""]
+
+curves = {}
+stats = {}
+xarr = np.linspace(0, 1, 400)
+for (r, v), c in zip(
+            [(0.2, 1e-32), (0.2, 1e-16), (0.2, 1e-8), (0.2, 1e-4), (0.2, 1e-2), (0.2, 0.5),
+             (0.5, 0.5), (0.5, 0.1), (0.5, 1),
+             (1, 0.5), (1, 1e1), (1, 30), (1, 50), (1, 70), (1, 1e2), (1, 1e3), (1, 1e5), (1, 1e6),
+             (5, 0.5), (5, 8), (5, 1e4), (5, 2e4), (5, 4e4), (5, 1e6), (5, 1e8), (5, 1e16), (5, 1e32),
+             (6.24122778821756, 414.7130462762959),
+             (2.2715995006941436, 6.278153793994013e-08),
+             (2.271457193328191, 6.075242708902806e-08),
+             (2.269182419251242, 6.794061846449025e-08),
+             (2.691033486949275e-17, 0.02930210834055045)
+            ],
+        itertools.cycle(config.viz.colors.bright.cycle)):
+    rv = get_beta_rv(r, v)
+    if isinstance(rv.dist, scipy.stats.rv_discrete):
+        # Dirac delta distribution
+        p, = rv.args
+        if p == 0:
+            α, β = np.inf, 0
+        elif p == 1:
+            α, β = 0, np.inf
+        else:
+            α, β = 0, 0
+    else:
+        # rv is a beta random variable
+        α, β = rv.args
+    # α, β = get_beta_α_β(r, v)
+    x = draw_from_beta(r, v, n_samples=4000)
+    # rv = beta(α, β)
+    # x = rv.rvs(4000)
+    pdf = rv.pmf(xarr) if isinstance(rv.dist, scipy.stats.rv_discrete) else rv.pdf(xarr)
+    curves[(r,v)] = hv.Curve(zip(xarr, pdf), label=f"{r=}, {v=}",
+                             kdims=[hv.Dimension("x1", label="$x_1$")],
+                             vdims=[hv.Dimension("px1", label="p($x_1$)")]  # non-TeX brackets to avoid legend issue 
+                            ).opts(color=c)
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", "invalid value encountered", category=RuntimeWarning)
+        stats[(r,v)] = tuple(f"{y:.3f}" for y in
+                              (α, β, x.mean(), x.std(),
+                               np.exp(digamma(α)) / (np.exp(digamma(α)) + np.exp(digamma(β))),
+                               0.5 * (polygamma(1, α) + polygamma(1, β))
+                              ))
+
+hmap = hv.HoloMap(curves, kdims=["r", "v"])
+dists = hmap.overlay()
+dists.opts(legend_position="right")
+dists.opts(width=500, backend="bokeh")
+dists.opts(fig_inches=7, aspect=2.5, legend_cols=2, backend="matplotlib")
+```
+
++++ {"tags": ["remove-cell", "skip-execution"]}
+
+Extra test, for a point that is especially sensitive to numerical issues: despite the very tight distribution, only 10-25% points raise a warning.
+
+```{code-cell}
+:tags: [remove-cell, skip-execution, active-ipynb]
+
+r=2.2691824192512438;    Δr = 0.000000000000001
+v=6.79406184644904e-08;  Δv = 1e-22
+s=2
+rng = np.random.default_rng(45)
+for r,v in rng.uniform([r-s*Δr, v-s*Δv], [r+s*Δr, v+s*Δv], size=(40,2)):
+    draw_from_beta(r,v)
+```
+
+Statistics for the fitted beta distributions. $\mathbb{E}[x_1]$ and $\mathrm{std}[x_1]$ are computed from 4000 samples. $\mathbb{E}_a[x_1]$ and $\mathrm{Mvar}[x_1,x_2]$ are computed using the expressions above.
+
+```{code-cell}
+:tags: [active-ipynb, hide-input, full-width]
+
+def clean_table_mpl(plot, element):
+    "TODO: Modify table to remove vertical lines"
+    table = plot.handles["artist"]
+
+flat_stats = [k + v for k,v in stats.items()]
+dim_Ex1 = hv.Dimension("Ex1", label="$\mathbb{E}[x_1]$")
+dim_stdx1 = hv.Dimension("stdx1", label="$\mathrm{std}[x_1]$")
+dim_Eax1 = hv.Dimension("Eax1", label="$\mathbb{E}_a[x_1]$")
+dim_Mvar = hv.Dimension("Mvar", label="$\mathrm{Mvar}[x_1,x_2]$")
+stattable = hv.Table(flat_stats, kdims=["r", "v"],
+                     vdims=["α", "β", dim_Ex1, dim_stdx1, dim_Eax1, dim_Mvar])
+# We need to increase max_value_len because Holoviews uses the unformatted
+# length to decide when to truncate
+stattable.opts(max_rows=len(stattable)+1)  # Ensure all rows are shown
+stattable.opts(fig_inches=18, aspect=2.5, max_value_len=30, hooks=[clean_table_mpl], backend="matplotlib")
+```
+
++++ {"tags": ["remove-cell"]}
+
+`draw_from_beta` also supports passing `r` and `v` as vectors. This is mostly a convenience: internally the vectors are unpacked and $(α,β)$ are solved for individually.
+
+```{code-cell}
+:tags: [active-ipynb, skip-execution, remove-cell, full-width]
+
+r_vals, v_vals = np.array(
+    [(0.2, 1e-32), (0.2, 1e-16), (0.2, 1e-8), (0.2, 1e-4), (0.2, 1e-2), (0.2, 0.5),
+     (0.5, 0.5), (0.5, 0.1), (0.5, 1),
+     (1, 0.5), (1, 1e1), (1, 1e2), (1, 1e3), (1, 1e5), (1, 1e6),
+     (5, 0.5), (5, 8), (5, 1e4), (5, 2e4), (5, 4e4), (5, 1e6), (5, 1e8), (5, 1e16), (5, 1e32)]
+).T
+
+# α, β = get_α_β(r_vals, v_vals)
+# rv = beta(α, β)
+# x = rv.rvs((4000, len(r_vals)))
+x = draw_from_beta(r_vals, v_vals, n_samples=4000)
+flat_stats = np.stack((r_vals, v_vals, x.mean(axis=-1), x.std(axis=-1)
+                       #np.exp(digamma(α)) / (np.exp(digamma(α)) + np.exp(digamma(β))),
+                       #0.5 * (polygamma(1, α) + polygamma(1, β))
+                      )).T
+stattable = hv.Table(flat_stats, kdims=["r", "v"],
+                     vdims=[dim_Ex1, dim_stdx1])
+stattable.opts(max_rows=len(stattable)+1)
+stattable.opts(fig_inches=14, aspect=1.8, max_value_len=30, hooks=[clean_table_mpl], backend="matplotlib")
+```
+
++++ {"tags": ["remove-cell"]}
+
+### Comparative timings NumPy vs JAX
+
+:::{admonition} Takeaway
+:class: important
+
+- JAX random number generation is about 50% slower than NumPy.
+- We can JIT `f` and `f_mid`, because JAX provides `digamma` and `polygamma`.
+  However, this does not affect runtime in a meaningful way.
+- We can write `_draw_from_beta_scalar` as a JAX function, but without JITing, it is generally a few fold slower than the NumPy version (this is expected, because JAX functions have higher overhead).
+- If we rewrite it in a functional style, as would be required for JITing, it is *many* fold slower.
+  This is most likely due to two things (further profiling would be required to determine their relative importance):
+  + the extra function overhead, which would disappear if we could actually JIT;
+  + the use of the `Bisection` function instead of `brentq`
+- We can use `jaxopt.Bisection` to get a jittable root finding function. However, it is possibly slower than `brentq`.
+- The `jaxopt` library also provides `ScipyRootFinding`, but this just wraps `scipy.optimize.root`. So it allows the arguments to be jitted, and automatically differentiated, but it is not itself jittable.
+- Consequently, `_draw_from_beta_scalar` is also not jittable.
+- As a further consequence, we also can’t use `vmap` to vectorize `_draw_from_beta`, since it needs to trace through the function.
+- **Current verdict** As it stands, JAX adds complexity with no gain. To make it worthwhile, one would need either need to at least implement a jittable multivariate solver, and then see if by jitting the whole function we obtain meaningful improvement.
+  + `jaxopt` is written using [`lax.custom_root`](https://jax.readthedocs.io/en/latest/_autosummary/jax.lax.custom_root.html#jax.lax.custom_root). Possibly this could also be useful.
+  + Alternatively, perhaps one can rework the problem so that finding the $α, β$ pair does not require solving a 2-d optimization problem, but some other type of problem for which `jaxopt` has a jittable function.
+:::
+
+```{code-cell}
+:tags: [remove-cell, active-ipynb]
+
+# %timeit jax.random.split(subkey)
+# %timeit jax.random.beta(subkey, 1, 1)
+
+# %timeit rng.beta(1, 1)
+```
+
+:::{list-table} **Timing `_draw_from_beta_scalar`.** Timings marked *"JAX (imperative)"* use an implementation very near the NumPy one, with just minimal changes (replacement of np with jnp, RNG keys, etc.). In particular, the imperative version still uses `brentq`. Timings marked *"JAX (functional)"* use the more substantial rewrite still present above, which if in theory could be compiled if the multivariate root solver (`jaxopt_mvroot_solver`) was jittable. Using `scipy_mvroot_solver` instead results in very similar times.
+:header-rows: 2
+
+* -
+  -
+  -
+  - No JIT
+  -
+  - JIT-ed *gamma
+  -
+  - JIT-ed bisec
+* - L
+  - r
+  - v
+  - NumPy
+  - JAX (imperative)
+  - NumPy
+  - JAX (imperative)
+  - JAX (functional)
+* - 1
+  - 0.2
+  - 1e-32
+  - 1.14 μs ± 16.2 ns (±2%)
+  - 508 ns ± 0.614 ns (±2%)
+  - 1.1 μs ± 0.677 ns (±4%)
+  - 501 ns ± 0.388 ns (±1%)
+  - 1.57 ms ± 59.6 µs
+* - 1
+  - 0.2
+  - 1e-16
+  - 1.11 μs ± 2.95 ns (±2%)
+  - 507 ns ± 0.56 ns (±0%)
+  - 1.1 μs ± 5.78 ns (±1%)
+  - 501 ns ± 0.651 ns (±0%)
+  - 1.55 ms ± 13.9 µs
+* - 1
+  - 0.2
+  - 1e-08
+  - 550 μs ± 6.47 μs (±1%)
+  - 2.52 ms ± 12.4 μs (±1%)
+  - 560 μs ± 782 ns (±3%)
+  - 2.07 ms ± 16.8 μs (±5%)
+  - 62.3 ms ± 489 µs
+* - 1
+  - 0.2
+  - 0.0001
+  - 717 μs ± 12 μs (±1%)
+  - 2.84 ms ± 7.44 μs (±3%)
+  - 698 μs ± 2.97 μs (±1%)
+  - 2.22 ms ± 17.4 μs (±2%)
+  - 75.6 ms ± 556 µs
+* - 1
+  - 0.2
+  - 0.01
+  - 737 μs ± 8.1 μs (±1%)
+  - 2.91 ms ± 8.94 μs (±1%)
+  - 735 μs ± 5.79 μs (±3%)
+  - 2.23 ms ± 12.4 μs (±3%)
+  - 76.5 ms ± 1.42 ms
+* - 1
+  - 0.2
+  - 0.5
+  - 804 μs ± 6.82 μs (±1%)
+  - 3.02 ms ± 16.4 μs (±1%)
+  - 805 μs ± 16.5 μs (±2%)
+  - 2.31 ms ± 19.4 μs (±1%)
+  - 83.3 ms ± 1.88 ms
+* - 1
+  - 0.5
+  - 0.5
+  - 757 μs ± 8.25 μs (±1%)
+  - 2.88 ms ± 10.5 μs (±7%)
+  - 764 μs ± 857 ns (±1%)
+  - 2.26 ms ± 12 μs (±1%)
+  - 76.1 ms ± 886 µs
+* - 1
+  - 0.5
+  - 0.1
+  - 735 μs ± 10.2 μs (±2%)
+  - 2.86 ms ± 5.1 μs (±2%)
+  - 754 μs ± 3.06 μs (±2%)
+  - 2.28 ms ± 42 μs (±3%)
+  - 75.7 ms ± 496 µs
+* - 1
+  - 0.5
+  - 1.0
+  - 736 μs ± 8.01 μs (±1%)
+  - 2.86 ms ± 11.1 μs (±3%)
+  - 727 μs ± 3.3 μs (±4%)
+  - 2.26 ms ± 21.9 μs (±0%)
+  - 75.6 ms ± 406 µs 
+* - 1
+  - 1.0
+  - 0.5
+  - 512 μs ± 6.09 μs (±2%)
+  - 2.36 ms ± 9 μs (±2%)
+  - 559 μs ± 2 μs (±2%)
+  - 2.06 ms ± 13 μs (±2%)
+  - 49.2 ms ± 517 µs
+* - 1
+  - 1.0
+  - 10.0
+  - 516 μs ± 7.56 μs (±2%)
+  - 2.36 ms ± 7.87 μs (±2%)
+  - 557 μs ± 2.22 μs (±3%)
+  - 2.05 ms ± 19.1 μs (±2%)
+  - 49.1 ms ± 301 µs
+* - 1
+  - 1.0
+  - 100.0
+  - 479 μs ± 7.3 μs (±1%)
+  - 2.36 ms ± 15.4 μs (±1%)
+  - 498 μs ± 1.66 μs (±2%)
+  - 1.99 ms ± 17.1 μs (±1%)
+  - 48.9 ms ± 252 µs
+* - 1
+  - 1.0
+  - 1000.0
+  - 461 μs ± 10.3 μs (±2%)
+  - 2.31 ms ± 2.92 μs (±2%)
+  - 483 μs ± 3.23 μs (±1%)
+  - 1.98 ms ± 8.52 μs (±1%)
+  - 49 ms ± 312 µs
+* - 1
+  - 1.0
+  - 100000.0
+  - 2.11 μs ± 62.1 ns (±8%)
+  - 810 μs ± 1.51 μs (±1%)
+  - 2.07 μs ± 33.3 ns (±4%)
+  - 810 μs ± 2.65 μs (±2%)
+  - 1.29 ms ± 19.6 µs
+* - 1
+  - 1.0
+  - 1000000.0
+  - 2.07 μs ± 18.5 ns (±4%)
+  - 811 μs ± 2.22 μs (±1%)
+  - 2.05 μs ± 15.6 ns (±3%)
+  - 809 μs ± 1.35 μs (±0%)
+  - 1.28 ms ± 5.65 µs
+* - 1
+  - 5.0
+  - 0.5
+  - 805 μs ± 10 μs (±1%)
+  - 3.02 ms ± 4.95 μs (±2%)
+  - 790 μs ± 628 ns (±5%)
+  - 2.27 ms ± 8.23 μs (±2%)
+  - 85.4 ms ± 2.6 ms
+* - 1
+  - 5.0
+  - 8.0
+  - 853 μs ± 11.5 μs (±0%)
+  - 3.16 ms ± 34.4 μs (±1%)
+  - 829 μs ± 3.2 μs (±2%)
+  - 2.31 ms ± 17.3 μs (±1%)
+  - 90.6 ms ± 1.18 ms
+* - 1
+  - 5.0
+  - 10000.0
+  - 587 μs ± 9.9 μs (±3%)
+  - 2.67 ms ± 28.2 μs (±0%)
+  - 573 μs ± 990 ns (±1%)
+  - 2.07 ms ± 18.3 μs (±1%)
+  - 62 ms ± 490 µs
+* - 1
+  - 5.0
+  - 20000.0
+  - 2.06 μs ± 19.8 ns (±2%)
+  - 813 μs ± 5.46 μs (±2%)
+  - 2.05 μs ± 16.6 ns (±1%)
+  - 808 μs ± 1.38 μs (±2%)
+  - 1.28 ms ± 6 µs
+* - 1
+  - 5.0
+  - 40000.0
+  - 2.16 μs ± 10.2 ns (±3%)
+  - 819 μs ± 7.11 μs (±1%)
+  - 2.07 μs ± 9.88 ns (±3%)
+  - 806 μs ± 1.95 μs (±0%)
+  - 1.28 ms ± 11.1 µs
+* - 1
+  - 5.0
+  - 1000000.0
+  - 2.06 μs ± 13.3 ns (±3%)
+  - 834 μs ± 1.26 μs (±2%)
+  - 2.06 μs ± 15.5 ns (±2%)
+  - 814 μs ± 2.21 μs (±1%)
+  - 1.28 ms ± 13.6 µs
+* - 1
+  - 5.0
+  - 100000000.0
+  - 2.08 μs ± 13 ns (±3%)
+  - 835 μs ± 1.88 μs (±2%)
+  - 2.05 μs ± 12 ns (±2%)
+  - 813 μs ± 1.25 μs (±1%)
+  - 1.27 ms ± 10.6 µs
+* - 1
+  - 5.0
+  - 1e+16
+  - 2.06 μs ± 24 ns (±3%)
+  - 827 μs ± 2.65 μs (±5%)
+  - 2.05 μs ± 15.4 ns (±4%)
+  - 812 μs ± 2.74 μs (±0%)
+  - 1.27 ms ± 5.78 µs
+* - 1
+  - 5.0
+  - 1e+32
+  - 2.11 μs ± 38.9 ns (±2%)
+  - 822 μs ± 9.73 μs (±1%)
+  - 2.06 μs ± 13.8 ns (±6%)
+  - 807 μs ± 2.03 μs (±1%)
+  - 1.28 ms ± 33.4 µs 
+:::
+
+```{code-cell}
+:tags: [active-ipynb, remove-cell]
+
+# r_vals, v_vals = np.array(
+#     [(0.2, 1e-32), (0.2, 1e-16), (0.2, 1e-8), (0.2, 1e-4), (0.2, 1e-2), (0.2, 0.5),
+#      (0.5, 0.5), (0.5, 0.1), (0.5, 1),
+#      (1, 0.5), (1, 1e1), (1, 1e2), (1, 1e3), (1, 1e5), (1, 1e6),
+#      (5, 0.5), (5, 8), (5, 1e4), (5, 2e4), (5, 4e4), (5, 1e6), (5, 1e8), (5, 1e16), (5, 1e32)]
+# ).T
+```
+
+```{code-cell}
+:tags: [active-ipynb, remove-cell]
+
+# key = jax.random.PRNGKey(0)
+# key, *subkeys = jax.random.split(key, 2)
+# rng = np.random.Generator(np.random.PCG64(0))
+
+# from collections import namedtuple
+# ResData = namedtuple("ResData", ["avg", "std", "diff_percent"])
+# def get_resdata(res_lst):
+#     avgs = [res.average for res in res_lst]
+#     stds = [res.stdev for res in res_lst]
+#     slst = []
+#     i = np.argmin(avgs)
+#     diff_percent = (np.max(avgs) - np.min(avgs)) / avgs[i] if len(res_lst) > 1 \
+#                    else None
+#     return ResData(avgs[i], stds[i], diff_percent)
+```
+
+```{code-cell}
+:tags: [remove-cell, active-ipynb]
+
+# try:
+#     time_data = np.load("timing_cache_jax-vs-numpy_draw-from-beta.npy")
+# except FileNotFoundError:
+#     time_results = {}
+# else:
+#     time_results = {(L, r, v): (ResData(npa, npb, npc), ResData(jaxa, jaxb, jaxc))
+#                     for (L, r, v, npa, npb, npc, jaxa, jaxb, jaxc) in time_data}
+```
+
+```{code-cell}
+:tags: [active-ipynb, remove-cell]
+
+# def timing_run(func, desc, rng_lst, time_results):
+#     progL = tqdm([1], desc="Sample size")  # [1, 7, 49, 343]
+#     progrv = tqdm(list(zip(r_vals, v_vals)), desc="r, v")
+#     for L in progL:
+#         progrv.reset()
+#         for r, v in progrv:
+#             if (L, r, v, desc) in time_results:
+#                 continue
+#             # Warm-up to make sure compilation is not included in profiling time
+#             _, x = func(r, v, rng_lst[0]); getattr(x, "block_until_ready", lambda:None)()
+#             res_lst = []
+#             for rng in rng_lst:
+#                 res = %timeit -o func(r, v, rng)
+#                 res_lst.append(res)
+#             time_results[(L, r, v, desc)] = get_resdata(res_lst)
+```
+
+```{code-cell}
+:tags: [active-ipynb, remove-cell, skip-execution]
+
+# timing_run(_draw_from_beta_scalar_jax, "jax, functional", subkeys, time_results=time_results)
+```
+
+```{code-cell}
+:tags: [active-ipynb, remove-cell]
+
+# progL = tqdm([1], desc="Sample size")  # [1, 7, 49, 343]
+# progrv = tqdm(list(zip(r_vals, v_vals)), desc="r, v")
+# for L in progL:
+#     progrv.reset()
+#     for r, v in progrv:
+#         if (L, r, v) in time_results:
+#             continue
+#         res_np = []
+#         res_jax = []
+#         for subkey in subkeys:
+#             res = %timeit -o _draw_from_beta_scalar(r, v, rng)
+#             res_np.append(res)
+#             res = %timeit -o _draw_from_beta_scalar_jax(r, v, subkey)
+#             res_jax.append(res)
+#         time_results[(L, r, v)] = (get_resdata(res_np), get_resdata(res_jax))
+```
+
+```{code-cell}
+:tags: [remove-cell, active-ipynb]
+
+# _time_data = np.array([
+#     [L, r, v, *res_np, *res_jax]
+#     for (L, r, v), (res_np, res_jax) in time_results.items()
+# ])
+```
+
+```{code-cell}
+:tags: [active-ipynb, remove-cell]
+
+# np.save("timing_cache_jax-vs-numpy_draw-from-beta", _time_data)
+```
+
+```{code-cell}
+# for ((L, r, v, npa, npb, npc, jaxa, jaxb, jaxc),
+#      (_, _, _, _npa, _npb, _npc, _jaxa, _jaxb, _jaxc)) in zip(time_data, _time_data):
+#     print("* -", int(L))
+#     print("  -", r)
+#     print("  -", v)
+#     print("  -", time_str(ResData(npa, npb, npc)))
+#     print("  -", time_str(ResData(jaxa, jaxb, jaxc)))
+#     print("  -", time_str(ResData(_npa, _npb, _npc)))
+#     print("  -", time_str(ResData(_jaxa, _jaxb, _jaxc)))                       
+```
+
+```{code-cell}
+:tags: [remove-cell, active-ipynb]
+
+# def format_with_unit(val, unit):
+#     if val >= 1:
+#         pass
+#     elif val >= 1e-3:
+#         val, unit = val*1e3, f"m{unit}"
+#     elif val >= 1e-6:
+#         val, unit = val*1e6, f"μ{unit}"
+#     else:
+#         val, unit = val*1e9, f"n{unit}"
+#     return f"{val:.3g} {unit}"
+# def time_str(data: ResData):
+#     return f"{format_with_unit(data.avg, 's')} ± {format_with_unit(data.std, 's')} (±{data.diff_percent*100:.0f}%)"
+# time_table = hv.Table([(int(L), r, v, time_str(ResData(npa, npb, npc)), time_str(ResData(jaxa, jaxb, jaxc)))
+#                        for (L, r, v, npa, npb, npc, jaxa, jaxb, jaxc) in time_data],
+#                       kdims=["# fits", "r", "v"], vdims=["NumPy", "JAX"])
+# time_table.opts(aspect=20/len(time_table), fig_inches=10, backend="matplotlib") \
+#           .opts(fit_columns=True, width=1000, backend="bokeh", )
+```
+
++++ {"tags": ["remove-cell"]}
+
+### Timings for the root solver
+
++++ {"tags": ["remove-cell"]}
+
+Reported below are timings for different numbers of fits, comparing a “loop” approach where `get_α_β` is called for each pair `(r, v)`, and a “vectorized” approach where `r` and `v` are passed as vectors.
+
+At the moment there is no clear benefit to using the vectorized form; this is likely because it performs the fit in a much higher dimensional space, and it must continue calculations with this large vector until all equations are solved.
+
+NB: Timings were done for an older version, where the function returned the $α, β$ parameters rather than a beta random variable. This function also performed vectorized operations by enlarging the fit dimension, rather than the current approach of looping over $(r, v)$ pairs. The observation that this approach was in general no faster than looping partly motivated the change, so we keep these timings results as documentation.
+
++++ {"tags": ["remove-cell"]}
+
+```python
+time_results = []
+for L in tqdm([1, 7, 49, 343]):
+    r_vals = np.random.uniform(low=0, high=1, size=L)
+    v_vals = np.random.exponential(3, size=L)
+    [get_α_β(r, v) for r, v in zip(r_vals, v_vals)]
+    res_loop = %timeit -q -o [get_α_β(r, v) for r, v in zip(r_vals, v_vals)]
+    res_vec = %timeit -q -o get_α_β(r_vals, v_vals)
+    time_results.append((L, res_loop, res_vec))
+
+def time_str(time_res): s = str(time_res); return s[:s.index(" per loop")]
+time_table = hv.Table([(L, time_str(res_loop), time_str(res_vec))
+                       for L, res_loop, res_vec in time_results],
+                      kdims=["# fits"], vdims=["loop", "vectorized"])
+time_table.opts(aspect=4, fig_inches=7)
+```
+
++++ {"tags": ["remove-cell"]}
+
+| # fits |             loop |        vectorized |
+|-------:|-----------------:|------------------:|
+|      1 | 555 μs ± 14.2 μs |  515 μs ± 20.2 μs |
+|      7 | 4.06 ms ± 108 μs |    1.8 ms ± 20 μs |
+|     49 | 28.5 ms ± 589 μs |  17.1 ms ± 146 μs |
+|    343 | 187 ms ± 2.15 ms | 3.95 ms ± 33.1 ms |
+
++++ {"tags": ["remove-cell"]}
+
+The test above samples $r$ from the entire interval $[0, 1]$, but we get similar results when restricting values to the “easy” region $[0.4, 0.6]$. Reducing the values of $v$ (by sampling from a distribution with lighter tail) does bring down the execution time of the vectorized approach. This is consistent with the hypothesis that a few especially difficult $(r,v)$ combinations are slowing down the computations.
+
++++ {"tags": ["remove-cell"]}
+
+```python
+time_results2 = []
+for L in tqdm([1, 7, 49, 343]):
+    r_vals = np.random.uniform(low=0.4, high=.6, size=L)
+    v_vals = np.random.exponential(1, size=L)
+    [get_α_β(r, v) for r, v in zip(r_vals, v_vals)]
+    res_loop = %timeit -q -o [get_α_β(r, v) for r, v in zip(r_vals, v_vals)]
+    res_vec = %timeit -q -o get_α_β(r_vals, v_vals)
+    time_results2.append((L, res_loop, res_vec))
+
+def time_str(time_res): s = str(time_res); return s[:s.index(" per loop")]
+time_table = hv.Table([(L, time_str(res_loop), time_str(res_vec))
+                       for L, res_loop, res_vec in time_results2],
+                      kdims=["# fits"], vdims=["loop", "vectorized"])
+time_table.opts(aspect=4, fig_inches=7)
+```
+
++++ {"tags": ["remove-cell"], "editable": true, "slideshow": {"slide_type": ""}}
+
+| # fits |              loop |        vectorized |
+|-------:|------------------:|------------------:|
+|      1 |  474 μs ± 12.3 μs |  452 μs ± 2.62 μs |
+|      7 | 3.92 ms ± 43.7 μs | 1.65 ms ± 22.9 μs |
+|     49 |  29.2 ms ± 167 μs | 16.6 ms ± 84.8 μs |
+|    343 |  214 ms ± 4.27 ms | 1.37 ms ± 6.24 ms |
+
++++
+
+(supp_path-sampling_implementation)=
+## Implementation
+
++++
+
+### Generate a single path
+
+Now that we know how to construct an sampling distribution for the increments, sampling an entire path is just a matter of repeating the process recursively until we reach the desired resolution.
+
+```{code-cell}
+def generate_path_hierarchical_beta(
+        qstar: Callable, deltaEMD: Callable, c: float,
+        qstart: float, qend: float, res: int=8, rng=None,
+        *, Phistart: float=0., Phiend: float=1.
+    ) -> Tuple[Array[float,1], Array[float,1]]:
+    """
+    Returned path has length ``2**res + 1``.
+    If `qstar` and`Mvar` have a different length, they are linearly-
+    interpolated to align with the returned array `Φhat`.
+    
+    Parameters
+    ----------
+    qstar: Empirical (mixed) quantile function. The generated path will
+       be centered on this trace. Although any callable mapping [0, 1] to R
+       is accepted, in all practical use cases this will be the output of
+       :func:`emd.make_empirical_risk_ppf`.
+    deltaEMD: Callable returning discrepancy values (i.e. {math}`δ^\mathrm{EMD}`).
+       Like `qstar`, this must map [0, 1] to R+.
+    c: Proportionality constant relating δEMD to
+       the square root of the metric variance.
+    qstart, qend: The end point ordinates. The hierarchical beta process "fills in"
+       the space between a start and an end point, but it needs the value of _q_
+       to be given at those points. These end points are by default Φ=0 and Φ=1,
+       which correspond to the bounds of a quantile function.
+    res: Returned paths have length ``2**res``.
+       Typical values of `res` are 6, 7 and 8, corresponding to paths of length
+       64, 128 and 256. Smaller may be useful to accelerate debugging. Larger values
+       increase the computation cost with typically negligible improvements in accuracy.
+    rng: Any argument accepted by `numpy.random.default_rng` to initialize an RNG.
+    Phistart, Phiend: (Keyword only) The abscissa corresponding to the ordinates
+       `qstart` and `qend`. The default values are 0 and 1, appropriate for generating
+       a quantile function.
+    
+    Returns
+    -------
+    The pair Φhat, qhat.
+        Φhat: Array of equally spaced values between 0 and 1, with step size ``2**-res``.
+        qhat: The generated path, evaluated at the values listed in `Φhat`.
+    """
+    # Validation
+    res = int(res)
+    if Phistart >= Phiend:
+        raise ValueError("`Phistart` must be strictly smaller than `Phiend`. "
+                         f"Received:\n  {Phistart=}\n  {Phiend=}")
+    # if not (len(Phi) == len(qstar) == len(Mvar)):
+    #     raise ValueError("`Phi`, `qstar` and `Mvar` must all have "
+    #                      "the same shape. Values received have the respective shapes "
+    #                      f"{np.shape(Phi)}, {np.shape(qstar)}, {np.shape(Mvar)}")
+    if res <= 1:
+        raise ValueError("`res` must be greater than 1.")
+    rng = np.random.default_rng(rng)  # No-op if `rng` is already a Generator
+    # Interpolation
+    N  = 2**res + 1
+    Φarr = np.linspace(Phistart, Phiend, N)
+    qsarr = qstar(Φarr)
+    # Pre-computations
+    Mvar = c * deltaEMD(Φarr)**2
+    # Algorithm
+    qhat = np.empty(N)
+    qhat[0] = qstart
+    qhat[-1] = qend
+    for n in range(1, res+1):
+        Δi = 2**(res-n)
+        i = np.arange(Δi, N, 2*Δi)  # Indices for the new values to insert
+        d = qhat[i+Δi] - qhat[i-Δi] # Each pair of increments must sum to `d`
+        r = (qsarr[i] - qsarr[i-Δi]) / (qsarr[i+Δi]-qsarr[i])  # Ratio of first/second increments
+        v = 2*Mvar[i]
+        # Prevent failure in pathological cases where `q` is flat in some places
+        if ((qsarr[i+Δi] - qsarr[i-Δi]) == 0).any():   # If both increments are zero, then the calculation for `r` gives 0/0 => NaN
+            logger.warning("The quantile function is not strictly increasing. Non-increasing intervals were skipped. The sampling of EMD paths may be less accurate.")
+            keep = ((qsarr[i+Δi] - qsarr[i-Δi]) > 0)  # `draw_from_beta` can deal with `r=0` and `r=inf`, but not `r=NaN`.
+            x1 = np.zeros_like(r)
+            x1[keep] = draw_from_beta(r[keep], v[keep], rng=rng)
+        else:
+            x1 = draw_from_beta(r, v, rng=rng)
+        qhat[i] = qhat[i-Δi] + d * x1
+    return Φarr, qhat
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb, hide-input]
+---
+qstar = np.log
+σtilde = lambda Φ: 1.5*np.ones_like(Φ)
+Φhat, qhat = generate_path_hierarchical_beta(
+    qstar, σtilde, c=1, qstart=np.log(0.01), qend=np.log(1),
+    res=8, rng=None, Phistart=0.01)  # Can’t start at zero precisely because log(0) is undefined
+
+Φarr = np.linspace(0.01, 1, 20)
+curve_qstar = hv.Curve(zip(Φarr, qstar(Φarr)), label=r"$\tilde{l}$", kdims=["Φ"])
+curve_qhat = hv.Curve(zip(Φhat, qhat), label=r"$\hat{l}$", kdims=["Φ"])
+fig.opts(ylabel="")
+```
+
+### Generate ensemble of sample paths
+
+:::{Note} This is the only public function exposed by this module
+:::
+
+To generate $R$ paths, we repeat the following $R$ times:
+1. Select start and end points by sampling $\nN(\tilde{Φ}[0], \lnLtt{}[0])$ and $\nN(\tilde{Φ}[-1], \lnLtt{}[-1])$.
+2. Call `generate_path_hierarchical_beta`.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [remove-cell]
+---
+def generate_quantile_paths(qstar: Callable, deltaEMD: Callable, c: float,
+                            M: int, res: int=8, rng=None,
+                            *, Phistart: float=0., Phiend: float=1,
+                            progbar: Union[Literal["auto"],None,tqdm,"mp.queues.Queue"]="auto",
+                            previous_M: int=0
+                           ) -> Generator[Tuple[Array[float,1], Array[float,1]], None, None]:
+    r"""
+    Generate `M` distinct quantile paths, with trajectory and variability determined
+    by `qstar` and `deltaEMD`.
+    Paths are generated using the hierarchical beta algorithm, with normal distributions
+    for the end points and beta distributions for the increments.
+    
+    Returned paths have length ``2**res + 1``.
+    Typical values of `res` are 6, 7 and 8, corresponding to paths of length
+    64, 128 and 256. Smaller values may be useful to accelerate debugging. Larger values
+    increase the computation cost with (typically) negligible improvements in accuracy.
+    
+    .. Note:: When using multiprocessing to call this function multiple times,
+       use either a `multiprocessing.Queue` or `None` for the `progbar` argument.
+    
+    Parameters
+    ----------
+    qstar: Empirical (mixed) quantile function. The generated paths will
+       be centered on this trace. Although any callable mapping [0, 1] to R
+       is accepted, in all practical use cases this will be the output of
+       :func:`emd.make_empirical_risk_ppf`.
+    deltaEMD: Callable returning scaled discrepancy values
+       (i.e. {math}`δ^\mathrm{EMD}`). Like `qstar`, this must map [0, 1] to R+.
+    c: Proportionality constant relating δEMD to
+       the square root of the metric variance.
+    M: Number of paths to generate.
+    res: Returned paths have length ``2**res + 1``.
+       Typical values of `res` are 6, 7 and 8, corresponding to paths of length
+       64, 128 and 256. Smaller may be useful to accelerate debugging, but larger
+       values are unlikely to be useful.
+    rng: Any argument accepted by `numpy.random.default_rng` to initialize an RNG.
+    progbar: Control whether to create a progress bar or use an existing one.
+       - With the default value 'auto', a new tqdm progress is created.
+         This is convenient, however it can lead to many bars being created &
+         destroyed if this function is called within a loop.
+       - To prevent this, a tqdm progress bar can be created externally (e.g. with
+         ``tqdm(desc="Generating paths")``) and passed as argument.
+         Its counter will be reset to zero, and its set total to `M` + `previous_M`.
+       - (Multiprocessing): To support updating progress bars within child processes,
+         a `multiprocessing.Queue` object can be passed, in which case no
+         progress bar is created or updated. Instead, each time a quantile path
+         is sampled, a value is added to the queue with ``put``. This way, the
+         parent process can update a progress by consuming the queue; e.g.
+         ``while not q.empty(): progbar.update()``.
+         The value added to the queue is `M`+`previous_M`, which can be
+         used to update the total value of the progress bar.
+       - A value of `None` prevents displaying any progress bar.
+
+    previous_M: Used only to improve the display of the progress bar:
+       the indicated total on the progress bar will be `M` + `previous_M`.
+       Use this when adding paths to a preexisting ensemble.
+
+    Yields
+    ------
+    Tuples of two 1-d arrays: (Φhat, qhat).
+    
+    Notes
+    -----
+    Returned paths always have an odd number of steps, which as a side benefit is
+    beneficial for integration with Simpson's rule.
+    """
+    rng = np.random.default_rng(rng)
+    total = M + previous_M
+    progbar_is_queue = ("multiprocessing.queues.Queue" in str(type(progbar).mro()))  # Not using `isinstance` avoids having to import multiprocessing & multiprocessing.queues
+    if isinstance(progbar, str) and progbar == "auto":
+        progbar = tqdm(desc="Sampling quantile paths", leave=False,
+                       total=total)
+    elif progbar is not None and not progbar_is_queue:
+        progbar.reset(total)
+        if previous_M:
+            # Dynamic miniters don’t work well with a restarted prog bar: use whatever miniter was determined on the first run (or 1)
+            progbar.miniters = max(progbar.miniters, 1)
+            progbar.dynamic_miniters = False
+            progbar.n = previous_M
+            progbar.refresh()
+    for r in range(M):
+        for _ in range(100):  # In practice, this should almost always work on the first try; 100 failures would mean a really pathological probability
+            qstart  = rng.normal(qstar(Phistart) , c*deltaEMD(Phistart))
+            qend = rng.normal(qstar(Phiend), c*deltaEMD(Phiend))
+            if qstart < qend:
+                break
+        else:
+            raise RuntimeError("Unable to generate start and end points such that "
+                               "start < end. Are you sure `qstar` is compatible "
+                               "with monotone paths ?")
+        Φhat, qhat = generate_path_hierarchical_beta(
+            qstar, deltaEMD, c, qstart=qstart, qend=qend,
+            res=res, rng=rng, Phistart=Phistart, Phiend=Phiend)
+        
+        yield Φhat, qhat
+        
+        if progbar_is_queue:
+            progbar.put(total)
+        elif progbar is not None:
+            progbar.update()
+            time.sleep(0.05)  # Without a small wait, the progbar might not update
+```
+
+### Usage example
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb, hide-input]
+---
+#Φtilde = np.linspace(0.01, 1, 20)
+qstar = np.log
+σtilde = lambda Φ: 1.5*np.ones_like(Φ)
+
+colors = cycle = config.viz.colors.bright
+
+curves_qhat = []
+for Φhat, qhat in generate_quantile_paths(
+        qstar, σtilde, c=1, M=10, res=8, rng=None, Phistart=0.01):
+    curves_qhat.append(hv.Curve(zip(Φhat, qhat), label=r"$\hat{l}$", kdims=["Φ"])
+                       .opts(color=colors.grey))
+Φtilde = np.linspace(0.01, 1, 20)
+curve_qstar = hv.Curve(zip(Φtilde, qstar(Φtilde)), label=r"$\tilde{l}$", kdims=["Φ"]) \
+                        .opts(color=colors.blue)
+
+hv.Overlay((*curves_qhat, curve_qstar)).opts(ylabel="")
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [remove-input, active-ipynb]
+---
+GitSHA()
+```
diff --git a/emd_falsify/path_sampling.py b/src/emd_falsify/path_sampling.py
similarity index 94%
rename from emd_falsify/path_sampling.py
rename to src/emd_falsify/path_sampling.py
index 46f4536..1735e8c 100644
--- a/emd_falsify/path_sampling.py
+++ b/src/emd_falsify/path_sampling.py
@@ -33,17 +33,17 @@
 # (supp_path-sampling)=
 # # Sampling quantile paths
 
-# %% [markdown]
+# %% [markdown] editable=true slideshow={"slide_type": ""}
 # ```{only} html
 # {{ prolog }}
 # %{{ startpreamble }}
 # %{{ endpreamble }}
 #
 # $\renewcommand{\lnLh}[1][]{\hat{l}_{#1}}
-# \renewcommand{\emdstd}[1][]{\tilde{σ}_{{#1}}}
+# \renewcommand{\emdstd}[1][]{\tilde{σ}_{{#1}}}$
 # ```
 
-# %% tags=["active-ipynb", "remove-cell"]
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "remove-cell"]
 # # import jax
 # # import jax.numpy as jnp
 # # from functools import partial
@@ -72,7 +72,7 @@
 # %% [markdown] tags=["remove-cell"]
 # Notebook-only imports
 
-# %% tags=["active-ipynb", "hide-input"] editable=true slideshow={"slide_type": ""}
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "hide-input"]
 # import itertools
 # import scipy.stats
 # import holoviews as hv
@@ -103,23 +103,42 @@
 # %% [markdown]
 # Because the joint requirements of monotonicity, non-stationarity and $Φ$-symmetry are uncommon for a stochastic process, some care is required to define an appropriate $\pathP$. The approach we choose here is to first select the end points $\lnLh(0)$ and $\lnLh(1)$, then fill the interval by successive binary partitions: first $\bigl\{\lnLh\bigl(\tfrac{1}{2}\bigl)\bigr\}$, then $\bigl\{\lnLh\bigl(\tfrac{1}{4}\bigr), \lnLh\bigl(\tfrac{3}{4}\bigr)\bigr\}$, $\bigl\{\lnLh\bigl(\tfrac{1}{8}\bigr), \lnLh\bigl(\tfrac{3}{8}\bigr), \lnLh\bigl(\tfrac{5}{8}\bigr), \lnLh\bigl(\tfrac{7}{8}\bigr)\bigr\}$, etc. (Below we will denote these ensembles $\{\lnLh\}^{(1)}$, $\{\lnLh\}^{(2)}$, $\{\lnLh\}^{(3)}$, etc.) Thus integrating over paths becomes akin to a path integral with variable end points.
 # Moreover, instead of drawing quantile values, we draw increments
-# $$Δ l_{ΔΦ}(Φ) := \lnLh(Φ+ΔΦ) - \lnLh(Φ) \,.$$ (eq_def-quantile-increment)
+# ```{math}
+# :label: eq_def-quantile-increment
+# Δ l_{ΔΦ}(Φ) := \lnLh(Φ+ΔΦ) - \lnLh(Φ) \,.
+# ```
 # Given two initial end points $\lnLh(0)$ and $\lnLh(1)$, we therefore first we draw the pair $\bigl\{Δ l_{2^{-1}}(0),\; Δ l_{2^{-1}}\bigl(2^{-1}\bigr)\}$, which gives us
-# $$\lnLh\bigl(2^{-1}\bigr) = \lnLh(0) + Δ l_{2^{-1}}(0) = \lnLh(1) - Δ l_{2^{-1}}\bigl(2^{-1}\bigr)\,.$$
+# ```{math}
+# \lnLh\bigl(2^{-1}\bigr) = \lnLh(0) + Δ l_{2^{-1}}(0) = \lnLh(1) - Δ l_{2^{-1}}\bigl(2^{-1}\bigr)\,.
+# ```
 # Then $\bigl\{\lnLh(0), \lnLh\bigl(\frac{1}{2}\bigr) \bigr\}$ and $\bigl\{ \lnLh\bigl(\frac{1}{2}\bigr), \lnLh(1) \bigr\}$ serve as end points to draw $\bigl\{Δ l_{2^{-2}}\bigl(0\bigr),\; Δ l_{2^{-2}}\bigl(2^{-2}\bigr) \bigr\}$ and $\bigl\{Δ l_{2^{-2}}\bigl(2^{-1}\bigr),\; Δ l_{2^{-2}}\bigl(2^{-1} + 2^{-2}\bigr) \bigr\}$. We repeat the procedure as needed, sampling smaller and smaller incremenents, until the path has the desired resolution. As the increments are constrained:
-# $$Δ l_{2^{-n}}(Φ) \in \bigl( 0, \lnLh(Φ+2^{-n+1}) - \lnLh(Φ)\,\bigr)\,, $$
+# ```{math}
+# Δ l_{2^{-n}}(Φ) \in \bigl( 0, \lnLh(Φ+2^{-n+1}) - \lnLh(Φ)\,\bigr)\,,
+# ```
 # the path thus sampled is always monotone. Note also that increments must be drawn in pairs (or more generally as a *combination*) of values constrained by their sum:
-# $$Δ l_{2^{-n}}\bigl(Φ\bigr) + Δ l_{2^{-n}}\bigl(Φ + 2^{-n} \bigr) \stackrel{!}{=} \lnLh(Φ+2^{-n+1}) - \lnLh(Φ) \,.$$ (eq_sum-constraint)
+# ```{math}
+# :label: eq_sum-constraint
+# Δ l_{2^{-n}}\bigl(Φ\bigr) + Δ l_{2^{-n}}\bigl(Φ + 2^{-n} \bigr) \stackrel{!}{=} \lnLh(Φ+2^{-n+1}) - \lnLh(Φ) \,.
+# ```
 # The possible increments therefore lie on a 1-simplex, for which a natural choice is to use a beta distribution[^1], with the random variable corresponding to the first increment $Δ l_{2^{-n}}(Φ)$. The density function of a beta random variable has the form
-# $$p(x_1) \propto x^{α-1} (1-x)^{β-1}\,,$$ (eq_beta-pdf)
+# ```{math}
+# :label: eq_beta-pdf
+# p(x_1) \propto x^{α-1} (1-x)^{β-1}\,,
+# ```
 # with $α$ and $β$ parameters to be determined.
 
 # %% [markdown]
 # :::{important}  
 # An essential property of a stochastic process is *consistency*: it must not matter exactly how we discretize the interval {cite:p}`gillespieMathematicsBrownianMotion1996`. Let $\{\lnLh\}^{(n)}$ denote the steps which are added when we refine the discretization from steps of $2^{-n+1}$ to steps of $2^{-n}$:
-# $$\{\lnLh\}^{(n)} := \bigl\{\lnLh(k\cdot 2^{-n}) \,\big|\, k=1,3,\dotsc,2^n \bigr\} \,.$$ (eq_added-steps)
+# ```{math}
+# :label: eq_added-steps
+# \{\lnLh\}^{(n)} := \bigl\{\lnLh(k\cdot 2^{-n}) \,\big|\, k=1,3,\dotsc,2^n \bigr\} \,.
+# ```
 # A necessary condition for consistency is that coarsening the discretization from steps of $2^{-n}$ to steps of $2^{-n+1}$ (i.e. marginalizing over the points at $\{\lnLh\}^{(n)}$) does not substantially change the probability law:
-# $$p\bigl(\{\lnLh\}^{(n)}\bigr)\bigr) \stackrel{!}{=} \int p\bigl(\{\lnLh\}^{(n)} \,\big|\, \{\lnLh\}^{(n+1)}\bigr) \,d\{\lnLh\}^{(n+1)} \;+\; ε\,,$$ (eq_consistency-condition)
+# ```{math}
+# :label: eq_consistency-condition
+# p\bigl(\{\lnLh\}^{(n)}\bigr)\bigr) \stackrel{!}{=} \int p\bigl(\{\lnLh\}^{(n)} \,\big|\, \{\lnLh\}^{(n+1)}\bigr) \,d\{\lnLh\}^{(n+1)} \;+\; ε\,,
+# ```
 # with $ε$ vanishing as $n$ is increased to infinity.
 #
 # We have found that failure to satisfy this requirement leads to unsatisfactory sampling of quantile paths. In particular, naive procedures tend to perform worse as $ΔΦ$ is reduced, making accurate integration impossible.
@@ -133,26 +152,35 @@
 # ### Conditions for choosing the beta parameters
 #
 # :::{margin}
-# Recall that in {numref}`sec_integrating-Me`, we made the assumption that the variability of the path process $\pathP$ should determined by $δ^{\EMD}$, up to some constant $c$. This constant is determined by a calibration experiment.
+# Recall that we made the assumption that the variability of the path process $\pathP$ should determined by $δ^{\EMD}$, up to some constant $c$.{cite:p}`reneFalsifyingModels2024` This constant is determined by a calibration experiment.
 # To keep expressions concise, in this section we use $\emdstd(Φ) := c δ^{\EMD}(Φ)$.
 # :::
 # To draw an increment $Δ l_{2^{-n}}$, we need to convert $\lnLtt(Φ)$ and $\emdstd(Φ) := c δ^{\EMD}(Φ)$ into beta distribution parameters $α$ and $β$. If $x_1$ follows a beta distribution, then its first two cumulants are given by
-# $$\begin{aligned}
+# ```{math}
+# \begin{aligned}
 # x_1 &\sim \Beta(α, β) \,, \\
 # \EE[x_1] &= \frac{α}{α+β} \,, \\
 # \VV[x_1] &= \frac{αβ}{(α+β)^2(α+β+1)} \,. \\
-# \end{aligned}$$
+# \end{aligned}
+# ```
 # However, as discussed by Mateu-Figueras et al. (2021, 2011), to properly account for the geometry of a simplex, one should use instead statistics with respect to the Aitchison measure, sometimes referred to as the *center* and *metric variance*. Defining $x_2 = 1-x_1$, these can be written (Mateu-Figueras et al., 2021)
-# $$\begin{align}
-# \EE_a[(x_1, x_2)] &= \frac{1}{e^{ψ(α)} + e^{ψ(β)}} \bigl[e^{ψ(α)}, e^{ψ(β)}\bigr] \,, \label{eq_Aitchison-moments__EE} \\
-# \Mvar[(x_1, x_2)] &= \frac{1}{2} \bigl(ψ_1(α) + ψ_1(β)\bigr) \,. \label{eq_Aitchison-moments__Mvar}
-# \end{align}$$ (eq_Aitchison-moments)
+# ```{math}
+# :label: eq_Aitchison-moments
+#
+# \begin{align}
+# \EE_a[(x_1, x_2)] &= \frac{1}{e^{ψ(α)} + e^{ψ(β)}} \bigl[e^{ψ(α)}, e^{ψ(β)}\bigr] \,, \\
+# \Mvar[(x_1, x_2)] &= \frac{1}{2} \bigl(ψ_1(α) + ψ_1(β)\bigr) \,. \\
+# \end{align}
+# ```
 # Here $ψ$ and $ψ_1$ are the digamma and trigamma functions respectively.
 # (In addition to being more appropriate, the center and metric variance are also better suited for defining a consistent stochastic process. For example, since the metric variance is unbounded, we can always scale it with $\emdstd(Φ)$ without exceeding its domain.)
 
 # %% [markdown]
 # Since we want the sum to be $d := \lnLh(Φ+2^{-n+1}) - \lnLh(Φ)$, we define
-# $$\bigl[Δ l_{2^{-n}}\bigl(Φ\bigr),\, Δ l_{2^{-n}}\bigl(Φ+2^{-n})\bigr)\bigr] = d \bigl[x_1, x_2\bigr] \,.$$  (eq_relation-beta-increment)
+# ```{math}
+# :label: eq_relation-beta-increment
+# \bigl[Δ l_{2^{-n}}\bigl(Φ\bigr),\, Δ l_{2^{-n}}\bigl(Φ+2^{-n})\bigr)\bigr] = d \bigl[x_1, x_2\bigr] \,.
+# ```
 # Then
 
 # %% [markdown]
@@ -185,11 +213,17 @@
 # %% [markdown]
 # **Remarks**
 # - We satisfy the necessary condition for consistency by construction:
-#   $$p\bigl(\{l\}^{(n)}\bigr)\bigr) = \int p\bigl(\{l\}^{(n)} \,\big|\, \{l\}^{(n+1)}\bigr) \,d\{l\}^{(n+1)}\,.$$
+#   ```{math}
+#   p\bigl(\{l\}^{(n)}\bigr)\bigr) = \int p\bigl(\{l\}^{(n)} \,\big|\, \{l\}^{(n+1)}\bigr) \,d\{l\}^{(n+1)}\,.
+#   ```
 # - The stochastic process is not Markovian, so successive increments are not independent. The variance of a larger increment therefore need not equal the sum of the variance of constituent smaller ones; in other words,
-#   $$Δ l_{2^{-n+1}}\bigl(Φ\bigr) = Δ l_{2^{-n}}\bigl(Φ\bigr) + Δ l_{2^{-n}}\bigl(Φ+2^{-n}\bigr)$$
+#   ```{math}
+#   Δ l_{2^{-n+1}}\bigl(Φ\bigr) = Δ l_{2^{-n}}\bigl(Φ\bigr) + Δ l_{2^{-n}}\bigl(Φ+2^{-n}\bigr)
+#   ```
 #   does *not* imply
-#   $$\VV\bigl[Δ l_{2^{-n+1}}\bigl(Φ\bigr)\bigr] = \VV\bigl[Δ l_{2^{-n}}\bigl(Φ\bigr)\bigr] + \VV\bigl[Δ l_{2^{-n}}\bigl(Φ+2^{-n}\bigr)\bigr]\,.$$
+#   ```{math}
+#   \VV\bigl[Δ l_{2^{-n+1}}\bigl(Φ\bigr)\bigr] = \VV\bigl[Δ l_{2^{-n}}\bigl(Φ\bigr)\bigr] + \VV\bigl[Δ l_{2^{-n}}\bigl(Φ+2^{-n}\bigr)\bigr]\,.
+#   ```
 # - Our defining equations make equivalent use of the pre ($Δ l_{2^{-n}}(Φ)$) and post ($Δ l_{2^{-n}}(Φ+2^{-n})$) increments, thus preserving symmetry in $Φ$.
 # - Step sizes of the form $2^{-n}$ have exact representations in binary. Thus even small step sizes should not introduce additional numerical errors.
 
@@ -205,15 +239,19 @@
 # $$\begin{align}
 # r &:= \frac{\lnLtt(Φ+2^{-n}) - \lnLtt(Φ)}{\lnLtt(Φ+2^{-n+1}) - \lnLtt(Φ+2^{-n})} \,; \\
 # v &:= 2 \emdstd\bigl(Φ + 2^{-n}\bigr)^2 \,.
-# \end{align}$$ (eq_def-r-v_supp) 
+# \end{align}$$ (eq_def-r-v) 
 # :::
-# (Definitions repeated from Eqs. \labelcref{eq_def-r-v__r,eq_def-r-v__v}.) The first value, $r$, is the ratio of two subincrements within $Δ l_{2^{-n+1}}(Φ)$.
+# The first value, $r$, is the ratio of two subincrements within $Δ l_{2^{-n+1}}(Φ)$.
 # Setting $\frac{e^{ψ(α)}}{e^{ψ(β)}} = r$, the two equations we need to solve for $α$ and $β$ can be written
-# $$\begin{align}
+# ```{math}
+# :label: eq_root-finding-problem
+#
+# \begin{align}
 # ψ(α) - ψ(β) &= \ln r \,; \\
 # \ln\bigl[ ψ_1(α) + ψ_1(β) \bigr] &= \ln v \,.
-# \end{align}$$ (eq_root-finding-problem_supp)
-# (Definitions repeated from Eqs. \labelcref{eq_root-finding-problem__r,eq_root-finding-problem__v}.) Note that these equations are symmetric in $Φ$: replacing $Φ$ by $-Φ$ simply changes the sign on both sides of the first. The use of the logarithm in the equation for $v$ helps to stabilize the numerics.
+# \end{align}
+# ```
+# Note that these equations are symmetric in $Φ$: replacing $Φ$ by $-Φ$ simply changes the sign on both sides of the first. The use of the logarithm in the equation for $v$ helps to stabilize the numerics.
 
 # %%
 def f(lnαβ, lnr_v, _array=np.array, _exp=np.exp, _log=np.log,
@@ -263,7 +301,7 @@ def f_mid(lnα, v, _exp=np.exp, _log=np.log, polygamma=scipy.special.polygamma):
 # %% [markdown]
 # The functions $ψ$ and $ψ_1$ diverge at zero, so $α$ and $β$ should remain positive. Therefore it makes sense to fit their logarithm: this enforces the lower bound, and improves the resolution where the derivative is highest. The two objective functions (up to scalar shift) are plotted below: the region for low $\ln α$ and $\ln β$ shows sharp variation around $α=β$, suggesting that this area may be most challenging for a numerical optimizer. In practice this is indeed what we observed.
 #
-# We found however that we can make fits much more reliable by first choosing a suitable initialization point along the $\ln α = \ln β$ diagonal. In practice this means setting $α_0 = α = β$ and solving the 1d problem of Eq. \labelcref{eq_root-finding-problem__v} for $α_0$. (We use the implementation of Brent’s method in SciPy.) Then we can solve the full 2d problem of Eqs. \labelcref{eq_root-finding-problem__r,eq_root-finding-problem__v}, with $(α_0, α_0)$ as initial value. This procedure was successful for all values of $r$ and $v$ we encountered in our experiments.
+# We found however that we can make fits much more reliable by first choosing a suitable initialization point along the $\ln α = \ln β$ diagonal. In practice this means setting $α_0 = α = β$ and solving the first equation of Eqs. {eq}`eq_root-finding-problem` for $α_0$. (We use the implementation of Brent’s method in SciPy.) Then we can solve the full 2d problem of Eqs. {eq}`eq_root-finding-problem`, with $(α_0, α_0)$ as initial value. This procedure was successful for all values of $r$ and $v$ we encountered in our experiments.
 
 # %% tags=["active-ipynb", "hide-input"]
 # α = np.logspace(-2, 1.2)
@@ -304,7 +342,7 @@ def f_mid(lnα, v, _exp=np.exp, _log=np.log, polygamma=scipy.special.polygamma):
 # hv.save(fig, path.with_suffix(".pdf"), backend="matplotlib")
 
 # %% [markdown]
-# :::{figure} ../figures/path-sampling_polygamma.svg
+# :::{figure} ../../figures/path-sampling_polygamma.svg
 # :name: fig_polygamma
 #
 # Characterization of the digamma ($ψ$) and trigamma ($ψ_1$) functions, and of the metric variance $\Mvar$.  
@@ -326,12 +364,12 @@ def f_mid(lnα, v, _exp=np.exp, _log=np.log, polygamma=scipy.special.polygamma):
 # #glue("fig_Jac-spectrum", fig, display=False)
 
 # %% tags=["remove-cell", "active-ipynb"]
-# path = config.paths.figuresdir/f"path-sampling_jac-spectrum"
+# path = config.paths.figures/f"path-sampling_jac-spectrum"
 # hv.save(fig, path.with_suffix(".svg"), backend="matplotlib")
 # hv.save(fig, path.with_suffix(".pdf"), backend="matplotlib")
 
 # %% [markdown]
-# :::{figure} ../figures/path-sampling_jac-spectrum.svg
+# :::{figure} ../../figures/path-sampling_jac-spectrum.svg
 # :name: fig_Jac-spectrum
 #
 # **Objective function has a saddle-point around (0,0)**
@@ -388,14 +426,20 @@ def f_mid(lnα, v, _exp=np.exp, _log=np.log, polygamma=scipy.special.polygamma):
 # ^^^
 #
 # Having $v$ go to infinity requires that $α$ and/or $β$ go to $0$ (see Eq. {eq}`eq_root-finding-problem` and {numref}`fig_polygamma`). The probability density of $x_1$ is then a Dirac delta: placed at $x_1=0$ if $α \to 0$, or placed at $x_1 = 1$ if $β \to 0$. If both $α$ and $β$ go to $0$, the PDF must be the sum of two weighted deltas:
-# $$p(x_1) = w_0 δ(x_1 - 0) + w_1 δ(x_1 - 1) \,.$$
+# ```{math}
+# p(x_1) = w_0 δ(x_1 - 0) + w_1 δ(x_1 - 1) \,.
+# ```
 # The weights $w_i$ can be determined by requiring that
-# $$\EE[x_1] = r \,,$$
+# ```{math}
+# \EE[x_1] = r \,,
+# ```
 # which yields
-# $$\begin{aligned}
+# ```{math}
+# \begin{aligned}
 # w_1 &= \frac{r}{r+1}\,, & w_2 &= \frac{1}{r+1} \,.
-# \end{aligned}$$
-# (For this special case, we revert to writing the condition in terms of a standard (Lebesgue) expectation, since the center (Eq. \labelcref{eq_Aitchison-moments__EE}) is undefined when $α, β \to 0$.)
+# \end{aligned}
+# ```
+# (For this special case, we revert to writing the condition in terms of a standard (Lebesgue) expectation, since the center (Eq. {eq}`eq_Aitchison-moments`) is undefined when $α, β \to 0$.)
 #
 # Since we have already considered the special cases $r = 0$ and $r \to \infty$, we can assume $0 < r < \infty$. Then both $α$ and $β$ are zero, and $x_1$ should be a Bernoulli random variable with success probability $p = w_2 = \frac{1}{r+1}$.
 #
@@ -1256,7 +1300,7 @@ def generate_path_hierarchical_beta(
     return Φarr, qhat
 
 
-# %% tags=["active-ipynb", "hide-input"] editable=true slideshow={"slide_type": ""}
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "hide-input"]
 # qstar = np.log
 # σtilde = lambda Φ: 1.5*np.ones_like(Φ)
 # Φhat, qhat = generate_path_hierarchical_beta(
@@ -1384,7 +1428,7 @@ def generate_quantile_paths(qstar: Callable, deltaEMD: Callable, c: float,
 # %% [markdown]
 # ### Usage example
 
-# %% tags=["active-ipynb", "hide-input"] editable=true slideshow={"slide_type": ""}
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "hide-input"]
 # #Φtilde = np.linspace(0.01, 1, 20)
 # qstar = np.log
 # σtilde = lambda Φ: 1.5*np.ones_like(Φ)
@@ -1402,5 +1446,5 @@ def generate_quantile_paths(qstar: Callable, deltaEMD: Callable, c: float,
 #
 # hv.Overlay((*curves_qhat, curve_qstar)).opts(ylabel="")
 
-# %% tags=["remove-input", "active-ipynb"] editable=true slideshow={"slide_type": ""}
+# %% editable=true slideshow={"slide_type": ""} tags=["remove-input", "active-ipynb"]
 # GitSHA()
diff --git a/src/emd_falsify/tasks.md b/src/emd_falsify/tasks.md
new file mode 100644
index 0000000..0cd3882
--- /dev/null
+++ b/src/emd_falsify/tasks.md
@@ -0,0 +1,792 @@
+---
+jupytext:
+  encoding: '# -*- coding: utf-8 -*-'
+  formats: py:percent,md:myst
+  notebook_metadata_filter: -jupytext.text_representation.jupytext_version
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+kernelspec:
+  display_name: Python (emd-falsify-dev)
+  language: python
+  name: emd-falsify-dev
+---
+
++++ {"tags": ["remove-cell"], "editable": true, "slideshow": {"slide_type": ""}}
+
+---
+math:
+  '\Bconf' : 'B^{\mathrm{conf}}_{#1}'
+  '\Bemd'  : 'B_{#1}^{\mathrm{EMD}}'
+---
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [hide-input]
+---
+from __future__ import annotations
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+# Tasks
+
+Running experiments via [SumatraTasks](https://sumatratask.readthedocs.io/en/latest/basics.html) has two purposes:
+- Maintaining an electronic lab book: recording all input/code/output triplets, along with a bunch of metadata to ensure reproducibility (execution date, code versions, etc.)
+- Avoid re-running calculations, with hashes that are both portable and long-term stable.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb]
+---
+from config import config   # Notebook
+```
+
+```{raw-cell}
+---
+editable: true
+raw_mimetype: ''
+slideshow:
+  slide_type: ''
+tags: [active-py]
+---
+from .config import config  # Python script
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+import abc
+import psutil
+import logging
+import time
+import multiprocessing as mp
+import numpy as np
+from functools import partial
+from itertools import repeat
+from typing import (
+    TypeVar, Optional, Union, Any, Callable,
+    Dict, Tuple, List, Iterable, NamedTuple, Literal)
+from dataclasses import dataclass, is_dataclass, replace
+from scityping import Serializable, Dataclass, Type
+from tqdm.auto import tqdm
+from scityping.functions import PureFunction
+# Make sure Array (numpy) and RV (scipy) serializers are loaded
+import scityping.numpy
+import scityping.scipy
+
+from smttask import RecordedTask, TaskOutput
+from smttask.workflows import ParamColl, SeedGenerator
+
+import emd_falsify as emd
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+logger = logging.getLogger(__name__)
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+__all__ = ["Calibrate", "EpistemicDist", "CalibrateOutput"]
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+(code_calibration-distribution)=
+## Epistemic distribution
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [skip-execution]
+---
+@dataclass
+class Experiment:
+    """Reference implementation for an experiment container;
+    represents one sample ω from the epistemic distribution.
+
+    Users may use this dataclass, or define their own: Experiment containers
+    need not subclass this class, but must provide the same attributes.
+
+    One reason to use a custom experiment class is to ensure that heavy
+    computations (like fitting the candidate models) happen on the child
+    MP processes.
+    """
+    data_model: DataModel
+    candidateA: CandidateModel
+    candidateB: CandidateModel
+    QA: RiskFunction
+    QB: RiskFunction
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+@dataclass(frozen=True)
+class EpistemicDist(abc.ABC):
+    """Generic template for a calibration distribution:
+    
+    in effect a calibration distribution with no calibration parameters.
+    For actual use you need to subclass `EpistemicDist` and extend it with 
+    parameters relevant to your models.
+
+    Using this class is not actually required for the `Calibrate` task: any
+    frozen dataclass will do. The only requirements for the dataclass are:
+
+    - That iterating over it yields data models.
+    - That it defines `__len__`.
+    - That all its parameters are serializable.
+    - That it be created with ``frozen=True``.
+
+    Users can choose to subclass this class, or just use it as a template.
+
+    .. Note:: If subclassing, the first argument will always be `N` since
+       subclasses append their parameters to the base class.
+    """
+    N: int|Literal[np.inf]     # Number of data models, i.e. length of iterator   
+    
+    @abc.abstractmethod
+    def __iter__(self) -> Experiment:
+        raise NotImplementedError
+        # rng = <create & seed an RNG using the dist parameters as entropy>
+        # for n in range(self.N):
+        #     <draw calibration params using rng>
+        #     yield <data model>, <candidate A>, <candidate B>, <loss A>, <loss B>
+
+    def __len__(self):
+        return self.N
+    
+    def generate(self, N: int):
+        """Return a copy of EpistemicDist which will yield `N` models.
+        
+        :param:N: Number of models to return.
+        """
+        return replace(self, N=N)
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+## Calibration task
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+### Task parameters
+
+| Parameter | Description |
+|-----------|-------------|
+| `c_list` | The values of $c$ we want to test. |
+| `models` | Sequence of $N$ quintuplets of models (data generation, candidate A, candidate B, loss A, loss B) drawn from a calibration distribution. Typically, but not necessarily, a subclass of `EpistemicDist`: any dataclass satisfying the requirements listed in `EpistemicDist` is accepted. |
+| `Ldata` | Data set size used to construct the empirical PPF for models $A$ and $B$. Ideally commensurate with the actual data set used to assess models. |
+| `Linf` | Data set size considered equivalent to "infinite". Used to compute $\B^{\mathrm{conf}}$ |
+
+The value of $N$ is determined from `len(models)`, so the `models` iterable should define its length.
+
+#### Config values:
+
+| Parameter | Description |
+|-----------|-------------|
+| `ncores`  |  Number of CPU cores to use. |
+
+##### Effects on compute time
+
+The total number of experiments will be
+$$N \times \lvert\mathtt{c\_list}\rvert \times \text{(\# parameter set distributions)} \,.$$
+In the best scenario, one can expect compute times to be 2.5 minutes / experiment. So expect this to take a few hours.
+
+Results are cached on-disk with [joblib.Memory](https://joblib.readthedocs.io/en/latest/memory.html), so this notebook can be reexecuted without re-running the experiments. Loading from disk takes about 1 minute for 6000 experiments.
+
+##### Effects on caching
+
+Like any [RecordedTask](https://sumatratask.readthedocs.io/en/latest/basics.html), `Calibrate` will record its output to disk. If executed again with exactly the same parameters, instead of evaluating the task again, the result is simply loaded from disk.
+
+In addition, `Calibrate` (or rather `Bemd`, which it calls internally) also uses a faster `joblib.Memory` cache to store intermediate results for each value of $c$ in `c_list`. Because `joblib.Memory` computes its hashes by first pickling its inputs, this cache is neither portable nor suitable for long-term storage: the output of `pickle.dump` may change depending on the machine, OS version, Python version, etc. Therefore this cache should be consider *local* and *short-term*. Nevertheless it is quite useful, because it means that `c_list` can be modified and only the new $c$ values will be computed.
+
+Changing any argument other than `c_list` will invalidate all caches and force all recomputations.
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+**Current limitations**
+- `ncores` depends on `config.mp.max_cores`, but is determined automatically.
+  No way to control via parameter.
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+### Types
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+#### Input types
+
+To be able to retrieve pasts results, [Tasks](https://sumatratask.readthedocs.io/en/latest/basics.html) rely on their inputs being serializable (i.e. convertible to plain text). Both [*Pydantic*](https://docs.pydantic.dev/latest/) and [*SciTyping*](https://scityping.readthedocs.io) types are supported; *SciTyping* in particular can serialize arbitrary [dataclasses](https://docs.python.org/3/library/dataclasses.html), as long as each of their fields are serializable.
+
+A weaker requirement for an object is to be pickleable. All serializable objects should be pickleable, but many pickleable objects are not serializable. In general, objects need to be pickleable if they are sent to a multiprocessing (MP) subprocess, and serializable if they are written to the disk.
+
+| Requirement | Reason | Applies to |
+|-------------|--------|----------|
+| Pickleable  | Sent to subprocess | `compute_Bemd` arguments |
+| Serializable | Saved to disk | `Calibrate` arguments<br>`CalibrateResult` |
+| Hashable    | Used as dict key | items of `models`<br>items of `c_list` |
+
+To satisfy these requirements, the sequence `models` needs to be specified as a frozen dataclass:[^more-formats] Dataclasses for serializability, frozen for hashability. Of course they should also define `__iter__` and `__len__` – see [`EpistemicDist`](code_calibration-distribution) for an example.
+
+[CHECK: I think loss functions are unrestricted now ? Maybe impact on caching ?] The loss functions `QA` and `QB` functions can be specified as either dataclasses (with a suitable `__call__` method) or [`PureFunction`s](https://scityping.readthedocs.io/en/latest/api/functions.html#scityping.functions.PureFunction). In practice we found dataclasses easier to use.
+
+[^more-formats]: We use dataclasses because they are the easiest to support, but support for other formats could be added in the future.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+SynthPPF = Callable[[np.ndarray[float]], np.ndarray[float]]
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+The items of the `models` sequence must be functions which take a single argument – the data size $L$ – and return a data set of size $L$:
+$$\begin{aligned}
+\texttt{data\_model}&:& L &\mapsto
+      \bigl[(x_1, y_1), (x_2, y_2), \dotsc, (x_L, y_L)\bigr]
+\end{aligned} \,. $$
+Exactly how the dataset is structured (single array, list of tuples, etc.) is up to the user.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+Dataset = TypeVar("Dataset",
+                  bound=Union[np.ndarray,
+                              List[np.ndarray],
+                              List[Tuple[np.ndarray, np.ndarray]]]
+                 )
+DataModel = Callable[[int], Dataset]
+CandidateModel = Callable[[Dataset], Dataset]
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+The `riskA` and `riskB` functions take a dataset returned by `data_model` and evaluate the risk $q$ of each sample. They return a vector of length $L$, and their signature depends on the output format of `data_model`:
+$$\begin{aligned}
+\texttt{risk function}&:& \{(x_i,y_i)\}_{i=1}^L &\mapsto \{q_i\}_{i=1}^L \,.
+\end{aligned}$$
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+RiskFunction = Callable[[Dataset], np.ndarray]
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+#### Result type
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+Calibration results are returned as a [record array](https://numpy.org/doc/stable/user/basics.rec.html#record-arrays) with fields `Bemd` and `Bconf`. Each row in the array corresponds to one data model, and there is one array per $c$ value. So a `CalibrateResult` object is a dictionary which looks something like the following:
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+$$
+\begin{alignedat}{4}  % Would be nicer with nested {array}, but KaTeX doesn’t support vertical alignment
+&\texttt{CalibrateResult}:\qquad & \{ c_1: &\qquad&  \texttt{Bemd} &\quad& \texttt{Bconf} \\
+  &&&&  0.24    && 0 \\
+  &&&&  0.35    && 1 \\
+  &&&&  0.37    && 0 \\
+  &&&&  0.51    && 1 \\
+  && c_2: &\qquad&  \texttt{Bemd} &\quad& \texttt{Bconf} \\
+  &&&&  0.11    && 0 \\
+  &&&&  0.14    && 0 \\
+  &&&&  0.22    && 0 \\
+  &&&&  0.30    && 1 \\
+  &&\vdots \\
+  &&\}
+\end{alignedat}$$
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+calib_point_dtype = np.dtype([("Bemd", float), ("Bconf", bool)])
+CalibrateResult = dict[float, np.ndarray[calib_point_dtype]]
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+class CalibrateOutput(TaskOutput):
+    """Compact format used to store task results to disk.
+    Use `task.unpack_result` to convert to a `CalibrateResult` object.
+    """
+    Bemd : List[float]
+    Bconf: List[float]
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+### Functions for the calibration experiment
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+Below we define the two functions to compute $\Bemd{}$ and $\Bconf{}$; these will be the abscissa and ordinate in the calibration plot.
+Both functions take an arguments a data generation model, risk functions for candidate models $A$ and $B$, and a number of data points to generate.
+
+- $\Bemd{}$ needs to be recomputed for each value of $c$, so we also pass $c$ as a parameter. $\Bemd{}$ computations are relatively expensive, and there are a lot of them to do during calibration, so we want to dispatch `compute_Bemd` to different multiprocessing (MP) processes. This has two consequences:
+
+  - The `multiprocessing.Pool.imap` function we use to dispatch function calls can only iterate over one argument. To accomodate this, we combine the data model and $c$ value into a tuple `datamodel_c`, which is unpacked within the `compute_Bemd` function.
+  - All arguments should be pickleable, as pickle is used to send data to subprocesses.
+
+- $\Bconf{}$ only needs to be computed once per data model. $\Bconf{}$ is also typically cheap (unless the data generation model is very complicated), so it is not worth dispatching to an MP subprocess.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+def compute_Bemd(i_ω_c: Tuple[int, Experiment, float],
+                 #datamodel_risk_c: Tuple[int,DataModel,CandidateModel,CandidateModel,RiskFunction,RiskFunction,float],
+                 #riskA: RiskFunction, riskB: RiskFunction,
+                 #synth_ppfA: SynthPPF, synth_ppfB: SynthPPF,
+                 #candidate_model_A: CandidateModel, candidate_model_B: CandidateModel,
+                 Ldata):
+    """
+    Wrapper for `emd_falsify.Bemd`:
+    - Unpack `datamodel_c` into `data_mode
+    - Instantiates models using parameters in `Θtup_c`.
+    - Constructs log-probability functions for `MtheoA` and `MtheoB`.
+    - Generates synthetic observed data using `Mtrue`.
+    - Calls `emd_falsify.Bemd`
+
+    """
+    ## Unpack arg 1 ##  (pool.imap requires iterating over one argument only)
+    # i, data_model, candidate_model_A, candidate_model_B, QA, QB, c = datamodel_risk_c
+    i, ω, c = i_ω_c
+
+    ## Generate observed data ##
+    logger.debug(f"Compute Bemd - Generating {Ldata} data points."); t1 = time.perf_counter()
+    data = ω.data_model(Ldata)                                     ; t2 = time.perf_counter()
+    logger.debug(f"Compute Bemd - Done generating {Ldata} data points. Took {t2-t1:.2f} s")
+
+    ## Construct synthetic quantile functions ##
+    synth_ppfA = emd.make_empirical_risk_ppf(ω.QA(ω.candidateA(data)))
+    synth_ppfB = emd.make_empirical_risk_ppf(ω.QB(ω.candidateB(data)))
+
+    ## Construct mixed quantile functions ##
+    mixed_ppfA = emd.make_empirical_risk_ppf(ω.QA(data))
+    mixed_ppfB = emd.make_empirical_risk_ppf(ω.QB(data))
+
+    ## Draw sets of expected risk values (R) for each model ##
+                     
+    # Silence sampling warnings: Calibration involves evaluating Bemd for models far from the data distribution, which require more
+    # than 1000 path samples to evaluate the path integral within the default margin.
+    # The further paths are from the most likely one, the more likely they are to trigger numerical warnings.
+    # This is expected, so we turn off warnings to avoid spamming the console.
+
+    logger.debug("Compute Bemd - Generating R samples"); t1 = time.perf_counter()
+    
+    emdlogger = logging.getLogger("emd_falsify.emd")
+    emdlogginglevel = emdlogger.level
+    emdlogger.setLevel(logging.ERROR)
+
+    # NB: Calibration explores some less well-fitted regions, so keeping `res` and `M` high is worthwhile
+    #     (Otherwise we get poor Bemd estimates and need more data.)
+    RA_lst = emd.draw_R_samples(mixed_ppfA, synth_ppfA, c=c)
+    RB_lst = emd.draw_R_samples(mixed_ppfB, synth_ppfB, c=c)
+
+    # Reset logging level as it was before
+    emdlogger.setLevel(emdlogginglevel)
+
+    t2 = time.perf_counter()
+    logger.debug(f"Compute Bemd - Done generating R samples. Took {t2-t1:.2f} s")
+                     
+    ## Compute the EMD criterion ##
+    Bemd = np.less.outer(RA_lst, RB_lst).mean()
+
+    ## Return alongside the experiment key
+    return (i, ω, c), Bemd
+    # return (i, data_model, QA, QB, c), Bemd
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+def compute_Bconf(data_model, QA, QB, Linf):
+    """Compute the true Bconf (using a quasi infinite number of samples)"""
+    
+    # Generate samples
+    logger.debug(f"Compute Bconf – Generating 'infinite' dataset with {Linf} data points"); t1 = time.perf_counter()
+    data = data_model(Linf)
+    t2 = time.perf_counter()
+    logger.debug(f"Compute Bconf – Done generating 'infinite' dataset. Took {t2-t1:.2f} s")
+    
+    # Compute Bconf
+    logger.debug("Compute Bconf – Evaluating expected risk on 'infinite' dataset"); t1 = time.perf_counter()
+    RA = QA(data).mean()
+    RB = QB(data).mean()
+    t2 = time.perf_counter()
+    logger.debug(f"Compute Bconf – Done evaluating risk. Took {t2-t1:.2f} s")
+    return RA < RB
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+def compute_Bemd_and_maybe_Bconf(i_ω_c, Ldata, Linf, c_conf):
+    """Wrapper which calls both `compute_Bemd` and `compute_Bconf`.
+    The latter is only called if `c` matches `c_conf`. 
+    
+    The reason for this wrapper is to better utilize multiprocessing threads,
+    by executing Bconf with the same MP threads as Bemd while still ensuring
+    that Bconf is not executed more often than needed.
+    Since Bemd is executed once for every `c` value in `c_list`, we choose
+    one value in `c_list` and make that special: whenever we compute Bemd for
+    that `c`, we also compute Bconf.
+
+    This has three related benefits:
+    - If there is caching that might reuse computations between compute_Bemd
+      and compute_Bconf (e.g. a data generation function decorated with @cache),
+      this increases the chances 
+    - It avoids having to execute Bconf in the main process, which can have
+      adverse effects if Bconf involves significant computation. If we have
+      n cores and n MP processes, then having computation occuring in the main
+      process is like having n+1 MP processes, which will lead to inefficient
+      switching.
+    - If the Bconf computations are so significant (less than n times faster
+      than Bemd), then they become a bottleneck and cause `imap` to accumulate
+      a queue of results. Since this is effectively an unbounded cache, if
+      those results require substantial memory, this can cause the entire
+      computation to crash because it exceeds available memory.
+    """
+    # NB: Since `data_model` is consumed within this function, even if there
+    #     were a bottleneck with imap, it should not exceed memory:
+    #     i, c, Bemd and Bconf are all small scalars.
+    (i, ω, c), Bemd = compute_Bemd(i_ω_c, Ldata)
+    if c == c_conf:
+        Bconf = compute_Bconf(ω.data_model, ω.QA, ω.QB, Linf)
+    else:
+        Bconf = None
+    return i, c, Bemd, Bconf
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+### Task definition
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [skip-execution]
+---
+@RecordedTask
+class Calibrate:
+
+    def __call__(
+        self,
+        c_list     : List[float],
+        #data_models: Sequence[DataModel],
+        experiments: Dataclass,   # Iterable of Experiment elements
+        #riskA     : RiskFunction
+        # riskA      : Union[Dataclass,PureFunction],
+        # riskB      : Union[Dataclass,PureFunction],
+        #synth_risk_ppf: SynthPPF
+        #synth_risk_ppfA  : Union[emd.interp1d, PureFunction],
+        #synth_risk_ppfB  : Union[emd.interp1d, PureFunction],
+        Ldata      : int,
+        Linf       : int,
+        ) -> CalibrateOutput:
+        """
+        Run a calibration experiment using the models listed in `models`.
+        Data models must be functions taking a single argument – an integer – and
+        returning a dataset with that many samples. They should be “ready to use”;
+        in particular, their random number generator should already be properly seeded
+        to avoid correlations between different models in the list.
+        
+        Parameters
+        ----------
+        c_list:
+        
+        experiments: Dataclass following the pattern of `EpistemicDist`.
+            Therefore also an iterable of data models to use for calibration;
+            when iterating, each element should be compatible with `Experiment` type.
+            See `EpistemicDist` for more details.
+            Each experiment will result in one (Bconf, Bemd) pair in the output results.
+            If this iterable is sized, progress bars will estimate the remaining compute time.
+        
+        Ldata: Number of data points from the true model to generate when computing Bemd.
+            This should be chosen commensurate with the size of the dataset that will be analyzed,
+            in order to accurately mimic data variability.
+        Linf: Number of data points from the true model to generate when computing `Bconf`.
+            This is to emulate an infinitely large data set, and so should be large
+            enough that numerical variability is completely suppressed.
+            Choosing a too small value for `Linf` will add noise to the Bconf estimate,
+            which would need to compensated by more calibration experiments.
+            Since generating more samples is generally cheaper than performing more
+            experiments, it is also generally preferable to choose rather large `Linf`
+            values.
+
+        .. Important:: An appropriate value of `Linf` will depend on the models and
+           how difficult they are to differentiate; it needs to be determined empirically.
+        """
+        pass
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+Bind arguments to the `Bemd` function, so it only take one argument (`datamodel_c`) as required by `imap`.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [skip-execution]
+---
+        # compute_Bemd_partial = partial(compute_Bemd, Ldata=Ldata)
+        compute_partial = partial(compute_Bemd_and_maybe_Bconf,
+                                  Ldata=Ldata, Linf=Linf, c_conf=c_list[0])
+```
+
+Define dictionaries into which we will accumulate the results of the $B^{\mathrm{EMD}}$ and $B_{\mathrm{conf}}$ calculations.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [skip-execution]
+---
+        Bemd_results = {}
+        Bconf_results = {}
+```
+
+- Set the iterator over parameter combinations (we need two identical ones)
+- Set up progress bar.
+- Determine the number of multiprocessing cores we will use.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [skip-execution]
+---
+        try:
+            N = len(experiments)
+        except (TypeError, AttributeError):  # Typically TypeError, but AttributeError seems also plausible
+            logger.info("Data model iterable has no length: it will not be possible to estimate the remaining computation time.")
+            total = None
+        else:
+            total = N*len(c_list)
+        progbar = tqdm(desc="Calib. experiments", total=total)
+        ncores = psutil.cpu_count(logical=False)
+        ncores = min(ncores, total, config.mp.max_cores)
+```
+
+Run the experiments. Since there are a lot of them, and they each take a few minutes, we use multiprocessing to run them in parallel.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [skip-execution]
+---
+        ω_c_gen = ((i, ω, c)  # i is used as an id for each different model/Qs set
+                        for i, ω in enumerate(experiments)
+                        for c in c_list)
+
+        if ncores > 1:
+            with mp.Pool(ncores, maxtasksperchild=config.mp.maxtasksperchild) as pool:
+                # Chunk size calculated following mp.Pool's algorithm (See https://stackoverflow.com/questions/53751050/multiprocessing-understanding-logic-behind-chunksize/54813527#54813527)
+                # (Naive approach would be total/ncores. This is most efficient if all taskels take the same time. Smaller chunks == more flexible job allocation, but more overhead)
+                chunksize, extra = divmod(N, ncores*6)
+                if extra:
+                    chunksize += 1
+                Bemd_Bconf_it = pool.imap_unordered(compute_partial, ω_c_gen,
+                                                    chunksize=chunksize)
+                for (i, c, Bemd, Bconf) in Bemd_Bconf_it:
+                    progbar.update(1)        # Updating first more reliable w/ ssh
+                    Bemd_results[i, c] = Bemd
+                    if Bconf is not None:
+                        Bconf_results[i] = Bconf
+                # Bemd_it = pool.imap(compute_Bemd_partial, ω_c_gen,
+                #                     chunksize=chunksize)
+                # for (i, data_model, QA, QB, c), Bemd_res in Bemd_it:
+                #     progbar.update(1)        # Updating first more reliable w/ ssh
+                #     Bemd_results[i, c] = Bemd_res
+                #     if i not in Bconf_results:
+                #         Bconf_results[i] = compute_Bconf(data_model, QA, QB, Linf)
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+Variant without multiprocessing:
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+        else:
+            Bemd_Bconf_it = (compute_partial(arg) for arg in ω_c_gen)
+            for (i, c, Bemd, Bconf) in Bemd_Bconf_it:
+                progbar.update(1)
+                Bemd_results[i, c] = Bemd
+                if Bconf is not None:
+                    Bconf_results[i] = Bconf
+            # Bemd_it = (compute_Bemd_partial(arg) for arg in ω_c_gen)
+            # for (i, data_model, QA, QB, c), Bemd_res in Bemd_it:
+            #     progbar.update(1)        # Updating first more reliable w/ ssh
+            #     Bemd_results[i, c] = Bemd_res
+            #     if i not in Bconf_results:
+            #         Bconf_results[i] = compute_Bconf(data_model, QA, QB, Linf)
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+Close progress bar:
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [skip-execution]
+---
+        progbar.close()
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+#### Result format
+
+If we serialize the whole dict, most of the space is taken up by serializing the models in the keys. Not only is this wasteful – we can easily recreate them with `model_c_gen` – but it also makes deserializing the results quite slow.
+So instead we return just the values as a list, and provide an `unpack_result` method which reconstructs the result dictionary.
+:::{caution}
+This assumes that we get the same models when we rebuild them within `unpack_result`.
+Two ways this assumption can be violated:
+- The user’s code for `experiments` has changed. (Unless the user used a type
+  which serialises to completely self-contained data.)
+- The model generation is non-reproducible. (E.g. it uses an unseeded RNG).
+:::
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [skip-execution]
+---
+        # NB: Don’t just use list(Bemd_results.values()): order of dictionary is not guaranteed
+        return dict(Bemd =[Bemd_results [i,c] for i in range(len(experiments)) for c in c_list],
+                    Bconf=[Bconf_results[i]   for i in range(len(experiments))])
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+> **END OF `Calibrate.__call__`**
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [skip-execution]
+---
+    def unpack_results(self, result: Calibrate.Outputs.result_type
+                      ) -> CalibrateResult:
+        assert len(result.Bemd) == len(self.c_list) * len(result.Bconf), \
+            "`result` argument seems not to have been created with this task."
+        # Reconstruct the dictionary as it was at the end of task execution
+        Bemd_dict = {}; Bemd_it = iter(result.Bemd)
+        Bconf_dict = {}; Bconf_it = iter(result.Bconf)
+        for i in range(len(result.Bconf)):         # We don’t actually need the models
+            Bconf_dict[i] = next(Bconf_it)         # => we just use integer ids
+            for c in self.taskinputs.c_list:       # This avoids unnecessarily
+                Bemd_dict[i, c] = next(Bemd_it)  # instantiating models.
+        # for data_model in self.taskinputs.models:
+        #     Bconf_dict[data_model] = next(Bconf_it)
+        #     for c in self.taskinputs.c_list:
+        #         Bemd_dict[(data_model, c)] = next(Bemd_it)
+        # Package results into a record arrays – easier to sort and plot
+        calib_curve_data = {c: [] for c in self.taskinputs.c_list}
+        for i, c in Bemd_dict:
+            calib_curve_data[c].append(
+                (Bemd_dict[i, c], Bconf_dict[i]) )
+
+        #return UnpackedCalibrateResult(Bemd=Bemd_dict, Bconf=Bconf_dict)
+        return {c: np.array(calib_curve_data[c], dtype=calib_point_dtype)
+                for c in self.taskinputs.c_list}
+```
+
++++ {"editable": true, "slideshow": {"slide_type": ""}}
+
+#### (Model, $c$) generator
+
+`task.model_c_gen` yields combined `(data_model, c)` tuples by iterating over both `models` and `c_list`. The reason we implement this in its own method is so we can recreate the sequence of models and $c$ values in `unpack_results`: this way we only need to store the sequence of $\Bemd{}$ and $\Bconf{}$ values for each (model, $c$) pair, but not the models themselves.
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [skip-execution]
+---
+    # def model_c_gen(self, Bemd_results: "dict|set", Bconf_results: "dict|set"):
+    #     """Return an iterator over data models and c values.
+    #     The two additional arguments `Bemd_results` and `Bconf_results` should be sets of
+    #     *already computed* results. At the moment this is mostly a leftover from a previous
+    #     implementation, before this function was made a *Task* — in the current 
+    #     implementation, the task always behaves as though empty dictionaries were passed.
+    #     """
+    #     for data_model, modelA, modelB in self.taskinputs.models:    # Using taskinputs allows us to call `model_c_gen`
+    #         for c in self.taskinputs.c_list:             # after running the task to recreate the keys.
+    #             if (data_model, c) not in Bemd_results:
+    #                 yield (data_model, modelA, modelB, c)
+    #             else:                                    # Skip results which are already loaded
+    #                 assert data_model in Bconf_results
+```
diff --git a/emd_falsify/tasks.py b/src/emd_falsify/tasks.py
similarity index 98%
rename from emd_falsify/tasks.py
rename to src/emd_falsify/tasks.py
index d7eab90..28a2a2b 100644
--- a/emd_falsify/tasks.py
+++ b/src/emd_falsify/tasks.py
@@ -2,19 +2,19 @@
 # ---
 # jupyter:
 #   jupytext:
-#     formats: py:percent
+#     formats: py:percent,md:myst
+#     notebook_metadata_filter: -jupytext.text_representation.jupytext_version
 #     text_representation:
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.15.0
 #   kernelspec:
-#     display_name: Python (emd-paper)
+#     display_name: Python (emd-falsify-dev)
 #     language: python
-#     name: emd-paper
+#     name: emd-falsify-dev
 # ---
 
-# %% [markdown] editable=true slideshow={"slide_type": ""}
+# %% [markdown] tags=["remove-cell"] editable=true slideshow={"slide_type": ""}
 # ---
 # math:
 #   '\Bconf' : 'B^{\mathrm{conf}}_{#1}'
@@ -31,10 +31,10 @@
 # - Maintaining an electronic lab book: recording all input/code/output triplets, along with a bunch of metadata to ensure reproducibility (execution date, code versions, etc.)
 # - Avoid re-running calculations, with hashes that are both portable and long-term stable.
 
-# %% tags=["active-ipynb"] editable=true slideshow={"slide_type": ""}
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb"]
 # from config import config   # Notebook
 
-# %% tags=["active-py"] editable=true slideshow={"slide_type": ""} raw_mimetype=""
+# %% editable=true raw_mimetype="" slideshow={"slide_type": ""} tags=["active-py"]
 from .config import config  # Python script
 
 # %% editable=true slideshow={"slide_type": ""}
@@ -92,6 +92,7 @@ class Experiment:
     QA: RiskFunction
     QB: RiskFunction
 
+
 # %% editable=true slideshow={"slide_type": ""}
 @dataclass(frozen=True)
 class EpistemicDist(abc.ABC):
@@ -277,7 +278,7 @@ class CalibrateOutput(TaskOutput):
 #   - The `multiprocessing.Pool.imap` function we use to dispatch function calls can only iterate over one argument. To accomodate this, we combine the data model and $c$ value into a tuple `datamodel_c`, which is unpacked within the `compute_Bemd` function.
 #   - All arguments should be pickleable, as pickle is used to send data to subprocesses.
 #
-# - $\Bconf{}$ only needs to be computed once per data model. $\Bconf{}$ is also typically cheap (unless the data generation model is very complicated), so it is not worth dispatching to an MP subprocess.  
+# - $\Bconf{}$ only needs to be computed once per data model. $\Bconf{}$ is also typically cheap (unless the data generation model is very complicated), so it is not worth dispatching to an MP subprocess.
 
 # %% editable=true slideshow={"slide_type": ""}
 def compute_Bemd(i_ω_c: Tuple[int, Experiment, float],
@@ -343,6 +344,7 @@ def compute_Bemd(i_ω_c: Tuple[int, Experiment, float],
     return (i, ω, c), Bemd
     # return (i, data_model, QA, QB, c), Bemd
 
+
 # %% editable=true slideshow={"slide_type": ""}
 def compute_Bconf(data_model, QA, QB, Linf):
     """Compute the true Bconf (using a quasi infinite number of samples)"""
@@ -361,6 +363,7 @@ def compute_Bconf(data_model, QA, QB, Linf):
     logger.debug(f"Compute Bconf – Done evaluating risk. Took {t2-t1:.2f} s")
     return RA < RB
 
+
 # %% editable=true slideshow={"slide_type": ""}
 def compute_Bemd_and_maybe_Bconf(i_ω_c, Ldata, Linf, c_conf):
     """Wrapper which calls both `compute_Bemd` and `compute_Bconf`.
@@ -608,4 +611,3 @@ def unpack_results(self, result: Calibrate.Outputs.result_type
     #                 yield (data_model, modelA, modelB, c)
     #             else:                                    # Skip results which are already loaded
     #                 assert data_model in Bconf_results
-
diff --git a/emd_falsify/utils.py b/src/emd_falsify/utils.py
similarity index 100%
rename from emd_falsify/utils.py
rename to src/emd_falsify/utils.py
diff --git a/src/emd_falsify/viz.md b/src/emd_falsify/viz.md
new file mode 100644
index 0000000..b398349
--- /dev/null
+++ b/src/emd_falsify/viz.md
@@ -0,0 +1,297 @@
+---
+jupytext:
+  encoding: '# -*- coding: utf-8 -*-'
+  formats: py:percent,md:myst
+  notebook_metadata_filter: -jupytext.text_representation.jupytext_version
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+kernelspec:
+  display_name: Python (emd-falsify-dev)
+  language: python
+  name: emd-falsify-dev
+---
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+---
+from __future__ import annotations
+```
+
+```{code-cell}
+import numpy as np
+import pandas as pd
+import matplotlib as mpl
+import seaborn as sns
+import holoviews as hv
+```
+
+```{code-cell}
+from dataclasses import dataclass
+from typing import Optional, Dict, List
+```
+
+```{code-cell}
+---
+editable: true
+raw_mimetype: ''
+slideshow:
+  slide_type: ''
+tags: [skip-execution]
+---
+from .config import config
+from . import utils
+```
+
+```{code-cell}
+---
+editable: true
+slideshow:
+  slide_type: ''
+tags: [active-ipynb, remove-cell]
+---
+from config import config
+import utils
+```
+
+```{code-cell}
+dash_patterns = ["dotted", "dashed", "solid"]
+```
+
+# Plotting functions
+
+```{code-cell}
+@dataclass
+class CalibrationPlotElements:
+    """
+    `bin_idcs`: Dictionary indicating which experiment index were assigned to
+       each bin. Use in conjunction with the EpistemicDistribution iterator
+       to reconstruct specific experiments.
+    """
+    calibration_curves: hv.Overlay
+    prohibited_areas   : hv.Area
+    discouraged_areas : hv.Overlay
+    bin_idcs: Dict[float,List[np.ndarray[int]]]
+
+    def __iter__(self):
+        yield self.calibration_curves
+        yield self.prohibited_areas
+        yield self.discouraged_areas
+
+    def _repr_mimebundle_(self, *args, **kwds):
+        return (self.prohibited_areas * self.discouraged_areas * self.calibration_curves)._repr_mimebundle_(*args, **kwds)
+```
+
+```{code-cell}
+def calibration_bins(calib_results: CalibrateResult,
+                     target_bin_size: Optional[int]=None):
+    """Return the bin edges for the histograms produced by `calibration_plot`.
+    
+    .. Note:: These are generally *not* good bin edges for plotting a histogram
+    of calibration results: by design, they will produce an almost
+    flat histogram.
+    """
+    bin_edges = {}
+    for c, data in calib_results.items():
+        i = 0
+        Bemd = np.sort(data["Bemd"])
+        edges = [Bemd[0]]
+        for w in utils.get_bin_sizes(len(Bemd), target_bin_size)[:-1]:
+            i += w
+            edges.append(Bemd[i:i+2].mean())
+        edges.append(Bemd[-1])
+        bin_edges[c] = edges
+    return bin_edges
+```
+
+```{code-cell}
+def calibration_plot(calib_results: CalibrateResult,
+                     target_bin_size: Optional[int]=None
+                    ) -> CalibrationPlotElements:
+    """Create a calibration plot from the results of calibration experiments.
+
+    Parameters
+    ----------
+    calib_results: The calibration results to plot. The typical way to obtain
+       these is to create and run `Calibrate` task:
+       >>> task = emd_falsify.tasks.Calibrate(...)
+       >>> calib_results = task.unpack_results(task.run())
+    target_bin_size: Each point on the calibration curve is an average over
+       some number of calibration experiments; this parameter sets that number.
+       (The actual number may vary a bit, if `target_bin_size` does not exactly
+       divide the total number of samples.)
+       Larger bin sizes result in fewer but more accurate curve points.
+       The default is to aim for the largest bin size possible which results
+       in 16 curve points, with some limits in case the number of results is
+       very small or very large.
+    """
+
+    ## Calibration curves ##
+    calib_curves = {}
+    calib_bins = {}
+    for c, data in calib_results.items():
+        # # We don’t do the following because it uses the Bconf data to break ties.
+        # # If there are a lot equal values (typically happens with a too small c),
+        # # then those will get sorted and we get an artificial jump from 0 to 1
+        # data.sort(order="Bemd")
+        σ = np.argsort(data["Bemd"])  # This will only use Bemd data; order within ties remains random
+        Bemd = data["Bemd"][σ]    # NB: Don’t modify original data order: 
+        Bconf = data["Bconf"][σ]  #     we may want to inspect it later.
+
+        curve_data = []
+        bin_idcs = []
+        i = 0
+        for w in utils.get_bin_sizes(len(data), target_bin_size):
+            curve_data.append((Bemd[i:i+w].mean(),
+                               Bconf[i:i+w].mean()))
+            bin_idcs.append(σ[i:i+w])
+            i += w
+
+        curve = hv.Curve(curve_data, kdims="Bemd", vdims="Bconf", label=f"{c=}")
+        curve = curve.redim.range(Bemd=(0,1), Bconf=(0,1))
+        curve.opts(hv.opts.Curve(**config.viz.calibration_curves))
+        calib_curves[c] = curve
+        calib_bins[c] = bin_idcs
+
+    calib_hmap = hv.HoloMap(calib_curves, kdims=["c"])
+
+    ## Prohibited & discouraged areas ##
+    # Prohibited area
+    prohibited_areas = hv.Area([(x, x, 1-x) for x in np.linspace(0, 1, 32)],
+                              kdims=["Bemd"], vdims=["Bconf", "Bconf2"],
+                              group="overconfident area")
+
+    # Discouraged areas
+    discouraged_area_1 = hv.Area([(x, 1-x, 1) for x in np.linspace(0, 0.5, 16)],
+                         kdims=["Bemd"], vdims=["Bconf", "Bconf2"],
+                         group="undershoot area")
+    discouraged_area_2 = hv.Area([(x, 0, 1-x) for x in np.linspace(0.5, 1, 16)],
+                         kdims=["Bemd"], vdims=["Bconf", "Bconf2"],
+                         group="undershoot area")
+
+    prohibited_areas = prohibited_areas.redim.range(Bemd=(0,1), Bconf=(0,1))
+    discouraged_area_1 = discouraged_area_1.redim.range(Bemd=(0,1), Bconf=(0,1))
+    discouraged_area_2 = discouraged_area_2.redim.range(Bemd=(0,1), Bconf=(0,1))
+
+    prohibited_areas.opts(hv.opts.Area(**config.viz.prohibited_area))
+    discouraged_area_1.opts(hv.opts.Area(**config.viz.discouraged_area))
+    discouraged_area_2.opts(hv.opts.Area(**config.viz.discouraged_area))
+
+    ## Combine & return ##
+    return CalibrationPlotElements(
+        calib_hmap, prohibited_areas, discouraged_area_1*discouraged_area_2,
+        calib_bins)
+```
+
+sanitize = hv.core.util.sanitize_identifier_fn
+
++++
+
+def plot_R_bars(df_emd_cond: pd.DataFrame, data_label: str,
+                   colors: list[str], size_dim: str, xformatter=None
+    ) -> hv.Overlay:
+    """
+    Create a plot of vertical marks, with each mark corresponding to a value
+    in `df_emd_cond`.
+    The span of marks corresponding to the same dataset size is show by a
+    horizontal bar above them. The values of dataset sizes should be given in
+    of the columns of the DataFrame `df_emd_cond`; `size_dim` indicates the
+    name of this column.
+
++++
+
+    `colors` must be a list at least as long as the number of rows in `df_emd_cond`.
+    `size_dim` must match the name of the index level in the DataFrame
+    used to indicate the dataset size.
+    """
+    size_labels = pd.Series(df_emd_cond.index.get_level_values(size_dim), index=df_emd_cond.index)
+    size_marker_heights = pd.Series((1.3 + np.arange(len(size_labels))*0.7)[::-1],  # [::-1] places markers for smaller data sets higher
+                                    index=df_emd_cond.index)
+    logL_dim = hv.Dimension("logL", label=data_label)
+    y_dim = hv.Dimension("y", label=" ", range=(-0.5, max(size_marker_heights)))
+    size_dim = hv.Dimension("data_size", label=size_dim)
+
++++
+
+    ## Construct the actual lines marking the log likelihood ##
+    vlines = [hv.Path([[(logL, -0.5), (logL, 1)]], kdims=[logL_dim, y_dim],
+                      group=size, label=model_lbl)
+                  .opts(color=c)
+                  .opts(line_dash=dash_pattern, backend="bokeh")
+                  .opts(linestyle=dash_pattern, backend="matplotlib")
+              for (size, dash_pattern) in zip(df_emd_cond.index, dash_patterns)
+              for (model_lbl, logL), c in zip(df_emd_cond.loc[size].items(), colors)]
+
++++
+
+    # Legend proxies (Required because Path elements are not included in the legend)
+    legend_proxies = [hv.Curve([(0,0)], label=f"model: {model_lbl}")
+                          .opts(color=c)
+                          .opts(linewidth=3, backend="matplotlib")
+                      for model_lbl, c in zip(df_emd_cond.columns, colors)]
+
++++
+
+    ## Construct the markers indicating data set sizes ##
+    # These are composed of a horizontal segment above the log L markers, and a label describing the data set size
+    logp_centers = (df_emd_cond.max(axis="columns") + df_emd_cond.min(axis="columns"))/2
+    df_size_labels = pd.DataFrame(
+        (logp_centers, size_marker_heights, size_labels),
+        index=["x", "y", size_dim.name]
+    ).T
+
++++
+
+    size_labels_labels = hv.Labels(df_size_labels, kdims=["x", "y"], vdims=size_dim)
+
++++
+
+    size_markers = hv.Segments(dict(logL=df_emd_cond.min(axis="columns"),
+                                    logL_right=df_emd_cond.max(axis="columns"),
+                                    y0=size_marker_heights.to_numpy(),
+                                    y1=size_marker_heights.to_numpy()),
+                               [logL_dim, "y0", "logL_right", "y1"])
+
++++
+
+    ## Assemble and configure options ##
+    ov = hv.Overlay([*vlines, *legend_proxies, size_markers, size_labels_labels])
+
++++
+
+    # For some reason, applying these opts separately is more reliable with matplotlib backend
+    size_markers.opts( 
+        hv.opts.Segments(color="black"),
+        hv.opts.Segments(line_width=1, backend="bokeh"),
+        hv.opts.Segments(linewidth=1, backend="matplotlib")
+    )
+    size_labels_labels.opts(
+        hv.opts.Labels(yoffset=0.2),
+        hv.opts.Labels(yoffset=0.2, text_font_size="8pt", text_align="center", backend="bokeh"),
+        hv.opts.Labels(yoffset=0.4, size=10, verticalalignment="top", backend="matplotlib")
+    )
+    ov.opts(
+        hv.opts.Path(yaxis="bare", show_frame=False),
+        # *(hv.opts.Path(f"{sanitize(size)}.{model_lbl}", color=c)
+        #   for model_lbl, c in zip(df_emd_cond.columns, colors.curve) for size in size_labels),
+        # *(hv.opts.Path(sanitize(size), line_dash=dashpattern, backend="bokeh")
+        #   for size, dashpattern in zip(
+        #       size_labels, dash_patterns)),
+        hv.opts.Path(line_width=3, backend="bokeh"),
+        hv.opts.Path(linewidth=3, backend="matplotlib"),
+        hv.opts.Overlay(yaxis="bare", show_frame=False, padding=0.175,
+                        show_legend=True, legend_position="bottom"),
+        hv.opts.Overlay(width=600, height=200,
+                        legend_spacing=30, backend="bokeh")
+    )
+    if xformatter:
+        ov.opts(xformatter=xformatter, backend="bokeh")
+    ov.opts(hooks=[no_spine_hook("left")], backend="matplotlib")
+
++++
+
+    return ov
diff --git a/emd_falsify/viz.py b/src/emd_falsify/viz.py
similarity index 91%
rename from emd_falsify/viz.py
rename to src/emd_falsify/viz.py
index f042895..a6e1ecc 100644
--- a/emd_falsify/viz.py
+++ b/src/emd_falsify/viz.py
@@ -1,21 +1,49 @@
+# -*- coding: utf-8 -*-
+# ---
+# jupyter:
+#   jupytext:
+#     formats: py:percent,md:myst
+#     notebook_metadata_filter: -jupytext.text_representation.jupytext_version
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#   kernelspec:
+#     display_name: Python (emd-falsify-dev)
+#     language: python
+#     name: emd-falsify-dev
+# ---
+
+# %% editable=true slideshow={"slide_type": ""}
 from __future__ import annotations
 
+# %%
 import numpy as np
 import pandas as pd
 import matplotlib as mpl
 import seaborn as sns
 import holoviews as hv
 
+# %%
 from dataclasses import dataclass
 from typing import Optional, Dict, List
 
+# %% editable=true raw_mimetype="" slideshow={"slide_type": ""} tags=["skip-execution"]
 from .config import config
 from . import utils
 
+# %% editable=true slideshow={"slide_type": ""} tags=["active-ipynb", "remove-cell"]
+# from config import config
+# import utils
+
+# %%
 dash_patterns = ["dotted", "dashed", "solid"]
 
-## Plotting functions ##
 
+# %% [markdown]
+# # Plotting functions
+
+# %%
 @dataclass
 class CalibrationPlotElements:
     """
@@ -36,6 +64,8 @@ def __iter__(self):
     def _repr_mimebundle_(self, *args, **kwds):
         return (self.prohibited_areas * self.discouraged_areas * self.calibration_curves)._repr_mimebundle_(*args, **kwds)
 
+
+# %%
 def calibration_bins(calib_results: CalibrateResult,
                      target_bin_size: Optional[int]=None):
     """Return the bin edges for the histograms produced by `calibration_plot`.
@@ -56,6 +86,8 @@ def calibration_bins(calib_results: CalibrateResult,
         bin_edges[c] = edges
     return bin_edges
 
+
+# %%
 def calibration_plot(calib_results: CalibrateResult,
                      target_bin_size: Optional[int]=None
                     ) -> CalibrationPlotElements:
@@ -133,9 +165,10 @@ def calibration_plot(calib_results: CalibrateResult,
         calib_hmap, prohibited_areas, discouraged_area_1*discouraged_area_2,
         calib_bins)
 
-
+# %% [markdown]
 # sanitize = hv.core.util.sanitize_identifier_fn
 
+# %% [markdown]
 # def plot_R_bars(df_emd_cond: pd.DataFrame, data_label: str,
 #                    colors: list[str], size_dim: str, xformatter=None
 #     ) -> hv.Overlay:
@@ -147,6 +180,7 @@ def calibration_plot(calib_results: CalibrateResult,
 #     of the columns of the DataFrame `df_emd_cond`; `size_dim` indicates the
 #     name of this column.
 
+# %% [markdown]
 #     `colors` must be a list at least as long as the number of rows in `df_emd_cond`.
 #     `size_dim` must match the name of the index level in the DataFrame
 #     used to indicate the dataset size.
@@ -157,7 +191,8 @@ def calibration_plot(calib_results: CalibrateResult,
 #     logL_dim = hv.Dimension("logL", label=data_label)
 #     y_dim = hv.Dimension("y", label=" ", range=(-0.5, max(size_marker_heights)))
 #     size_dim = hv.Dimension("data_size", label=size_dim)
-    
+
+# %% [markdown]
 #     ## Construct the actual lines marking the log likelihood ##
 #     vlines = [hv.Path([[(logL, -0.5), (logL, 1)]], kdims=[logL_dim, y_dim],
 #                       group=size, label=model_lbl)
@@ -166,13 +201,15 @@ def calibration_plot(calib_results: CalibrateResult,
 #                   .opts(linestyle=dash_pattern, backend="matplotlib")
 #               for (size, dash_pattern) in zip(df_emd_cond.index, dash_patterns)
 #               for (model_lbl, logL), c in zip(df_emd_cond.loc[size].items(), colors)]
-    
+
+# %% [markdown]
 #     # Legend proxies (Required because Path elements are not included in the legend)
 #     legend_proxies = [hv.Curve([(0,0)], label=f"model: {model_lbl}")
 #                           .opts(color=c)
 #                           .opts(linewidth=3, backend="matplotlib")
 #                       for model_lbl, c in zip(df_emd_cond.columns, colors)]
 
+# %% [markdown]
 #     ## Construct the markers indicating data set sizes ##
 #     # These are composed of a horizontal segment above the log L markers, and a label describing the data set size
 #     logp_centers = (df_emd_cond.max(axis="columns") + df_emd_cond.min(axis="columns"))/2
@@ -180,18 +217,22 @@ def calibration_plot(calib_results: CalibrateResult,
 #         (logp_centers, size_marker_heights, size_labels),
 #         index=["x", "y", size_dim.name]
 #     ).T
-    
+
+# %% [markdown]
 #     size_labels_labels = hv.Labels(df_size_labels, kdims=["x", "y"], vdims=size_dim)
-    
+
+# %% [markdown]
 #     size_markers = hv.Segments(dict(logL=df_emd_cond.min(axis="columns"),
 #                                     logL_right=df_emd_cond.max(axis="columns"),
 #                                     y0=size_marker_heights.to_numpy(),
 #                                     y1=size_marker_heights.to_numpy()),
-#                                [logL_dim, "y0", "logL_right", "y1"]) 
+#                                [logL_dim, "y0", "logL_right", "y1"])
 
+# %% [markdown]
 #     ## Assemble and configure options ##
 #     ov = hv.Overlay([*vlines, *legend_proxies, size_markers, size_labels_labels])
-    
+
+# %% [markdown]
 #     # For some reason, applying these opts separately is more reliable with matplotlib backend
 #     size_markers.opts( 
 #         hv.opts.Segments(color="black"),
@@ -220,6 +261,6 @@ def calibration_plot(calib_results: CalibrateResult,
 #     if xformatter:
 #         ov.opts(xformatter=xformatter, backend="bokeh")
 #     ov.opts(hooks=[no_spine_hook("left")], backend="matplotlib")
-    
-#     return ov
 
+# %% [markdown]
+#     return ov