Write paper body

gdalle · Sep 12, 2023 · fc6b292 · fc6b292
1 parent 99ab683
commit fc6b292
Show file tree

Hide file tree

Showing 2 changed files with 82 additions and 14 deletions.
diff --git a/HMM.bib b/HMM.bib
@@ -1,3 +1,33 @@
+@software{antonelloHMMGradientsJl2022,
+  title = {{{HMMGradients}}.Jl},
+  author = {Antonello, Niccolò},
+  date = {2022-08-06T09:40:18Z},
+  origdate = {2020-12-04T14:36:27Z},
+  url = {https://github.com/idiap/HMMGradients.jl},
+  urldate = {2022-08-06},
+  abstract = {Enables computing the gradient of the parameters of Hidden Markov Models (HMMs)},
+  organization = {{Idiap Research Institute}},
+  keywords = {\#nosource,hmm}
+}
+
+@article{besanconDistributionsJlDefinition2021,
+  title = {Distributions{{.jl}}: {{Definition}} and {{Modeling}} of {{Probability Distributions}} in the {{JuliaStats Ecosystem}}},
+  shorttitle = {Distributions.Jl},
+  author = {Besançon, Mathieu and Papamarkou, Theodore and Anthoff, David and Arslan, Alex and Byrne, Simon and Lin, Dahua and Pearson, John},
+  date = {2021-07-25},
+  journaltitle = {Journal of Statistical Software},
+  volume = {98},
+  pages = {1--30},
+  issn = {1548-7660},
+  doi = {10.18637/jss.v098.i16},
+  url = {https://doi.org/10.18637/jss.v098.i16},
+  urldate = {2022-09-19},
+  abstract = {Random variables and their distributions are a central part in many areas of statistical methods. The Distributions.jl package provides Julia users and developers tools for working with probability distributions, leveraging Julia features for their intuitive and flexible manipulation, while remaining highly efficient through zero-cost abstractions.},
+  langid = {english},
+  keywords = {hmm,thesis},
+  file = {/home/gdalle/Zotero/storage/FZ5V2QNZ/Besancon et al_2021_Distributions.pdf}
+}
+
 @article{bezansonJuliaFreshApproach2017,
   title = {Julia: {{A Fresh Approach}} to {{Numerical Computing}}},
   shorttitle = {Julia},
@@ -35,6 +65,26 @@ @book{cappeInferenceHiddenMarkov2005
   file = {/home/gdalle/Zotero/storage/2HYZE7ZD/Cappé et al_2005_Inference in Hidden Markov Models.pdf;/home/gdalle/Zotero/storage/QRNV9CL8/Cappé et al. - 2006 - Inference in Hidden Markov Models.pdf}
 }
 
+@thesis{dalleMachineLearningCombinatorial2022,
+  type = {phdthesis},
+  title = {Machine Learning and Combinatorial Optimization Algorithms, with Applications to Railway Planning},
+  author = {Dalle, Guillaume},
+  editora = {Meunier, Frédéric and De Castro, Yohann and Parmentier, Axel},
+  editoratype = {collaborator},
+  date = {2022-12-16},
+  institution = {{École des Ponts ParisTech}},
+  url = {https://www.theses.fr/2022ENPC0047},
+  urldate = {2023-03-31},
+  abbr = {Dissertation},
+  abstract = {This thesis investigates the frontier between machine learning and combinatorial optimization, two active areas of applied mathematics research. We combine theoretical insights with efficient algorithms, and develop several open source Julia libraries. Inspired by a collaboration with the Société nationale des chemins de fer français (SNCF), we study high-impact use cases from the railway world: train failure prediction, delay propagation, and track allocation.In Part I, we provide mathematical background and describe software implementations for various tools that will be needed later on: implicit differentiation, temporal point processes, Hidden Markov Models and Multi-Agent Path Finding. Our publicly-available code fills a void in the Julia package ecosystem, aiming at ease of use without compromising on performance.In Part II, we highlight theoretical contributions related to both statistics and decision-making. We consider a Vector AutoRegressive process with partial observations, and prove matching upper and lower bounds on the estimation error. We unify and extend the state of the art for combinatorial optimization layers in deep learning, gathering various approaches in a Julia library called InferOpt.jl. We also seek to differentiate through multi-objective optimization layers, which leads to a novel theory of lexicographic convex analysis.In Part III, these mathematical and algorithmic foundations come together to tackle railway problems. We design a hierarchical model of train failures, propose a graph-based framework for delay propagation, and suggest new avenues for track allocation, with the Flatland challenge as a testing ground.},
+  bibtex_show = {true},
+  hal = {https://pastel.archives-ouvertes.fr/tel-04053322},
+  langid = {english},
+  selected = {true},
+  keywords = {hmm,paper,website},
+  file = {/home/gdalle/Zotero/storage/CEVJMUP4/Dalle - Machine learning and combinatorial optimization al.pdf}
+}
+
 @software{HmmlearnHmmlearn2023,
   title = {Hmmlearn/Hmmlearn},
   date = {2023-09-10T19:29:19Z},

diff --git a/paper.md b/paper.md
@@ -1,5 +1,5 @@
 ---
-title: 'HiddenMarkovModels.jl: latent-variable modeling at the speed of Julia'
+title: 'HiddenMarkovModels.jl: generic, fast and reliable latent variable modeling'
 tags:
   - Julia
   - statistics
@@ -9,36 +9,54 @@ tags:
 authors:
   - name: Guillaume Dalle
     orcid: 0000-0003-4866-1687
-    equal-contrib: true
-    affiliation: "1, 2"
+    affiliation: 1
 affiliations:
- - name: EPFL, IdePHICS Lab, Switzerland
+ - name: EPFL, IdePHICS and INDY labs, Switzerland
    index: 1
- - name: EPFL, INDY Lab, Switzerland
-   index: 2
 date: September 2023
 bibliography: HMM.bib
 
 ---
 
 # Summary
 
-Hidden Markov Models (or HMMs) are a very popular statistical framework, with applications ranging from speech recognition to bioinformatics and reliability analysis.
-They model a sequence of _observations_ $\mathbf{Y} = (Y_1, \dots, Y_T)$ by assuming the existence of a hidden sequence of _states_ $\mathbf{X} = (X_1, \dots, X_T)$.
+Hidden Markov Models (or HMMs) are a very popular statistical framework, with applications ranging from speech recognition to bioinformatics.
+They model a sequence of _observations_ $Y_1, \dots, Y_T$ by assuming the existence of a hidden sequence of _states_ $X_1, \dots, X_T$.
 The distribution of a state $X_t$ can only depend on the previous state $X_{t-1}$, and the distribution of an observation $Y_t$ can only depend on the current state $X_t$.
-See @rabinerTutorialHiddenMarkov1989 for an introduction and @cappeInferenceHiddenMarkov2005 for a book-length treatment.
+This is a very versatile and practical set of assumptions: see @rabinerTutorialHiddenMarkov1989 for an introduction and @cappeInferenceHiddenMarkov2005 for a book-length treatment.
 
-Given a sequence of observations and a parametric family of HMMs, there are several problems one can face: calculating the likelihood, decoding the most likely state sequence, or even estimating the best parameter.
+Given a sequence of observations and a parametric family of HMMs, there are several problems one can face: computing the likelihood, decoding the underlying state sequence, or estimating the most likely parameter.
 For generic probabilistic graphical models, these problems are often intractable, but HMMs have a tree-like structure that yields exact solution procedures with polynomial complexity.
-
-Our library [HiddenMarkovModels.jl](https://github.com/gdalle/HiddenMarkovModels.jl) leverages the Julia language @bezansonJuliaFreshApproach2017 to implement HMM inference algorithms in an _efficient_ and _generic_ way.
+The package [`HiddenMarkovModels.jl`](https://github.com/gdalle/HiddenMarkovModels.jl) leverages the Julia language [@bezansonJuliaFreshApproach2017] to implement those algorithms in a _generic_, _fast_ and _reliable_ way.
 
 # Statement of need
 
+The initial motivation for HiddenMarkovModels.jl was an application of HMMs to reliability analysis for the French railway company SNCF [@dalleMachineLearningCombinatorial2022].
+In this industrial use case, the observations were marked temporal point processes (sequences of timed events with structured metadata) generated by condition monitoring systems.
+Such objects are hard to vectorize, which is why developing in Julia was a natural choice to combine performance and readability.
+
+For years, the reference Julia implementation of HMMs has been [`HMMBase.jl`](https://github.com/maxmouchet/HMMBase.jl) [@mouchetMaxmouchetHMMBaseJl2023].
+Its major flaw is that it can only handle observation distributions from [`Distributions.jl`](https://github.com/JuliaStats/Distributions.jl) [@besanconDistributionsJlDefinition2021].
+This precludes observations that are not scalar- or array-valued, and makes it harder for users to experiment with custom distributions.
+Note that a similar restriction is shared by the major competitors from the Python world, namely [`hmmlearn`](https://github.com/hmmlearn/hmmlearn) [@HmmlearnHmmlearn2023] and [`pomegranate`](https://github.com/jmschrei/pomegranate) [@schreiberPomegranateFastFlexible2018a].
+
+Other downsides of `HMMBase.jl` include the lack of support for multiple observation sequences, and the systematic use of 64-bit floating point numbers.
+Two other packages provide functionalities that `HMMBase.jl` lacks, but all three have mutually incompatible APIs: [`HMMGradients.jl`](https://github.com/idiap/HMMGradients.jl) [@antonelloHMMGradientsJl2022] contains a differentiable loglikelihood function, while [`MarkovModels.jl`](https://github.com/FAST-ASR/MarkovModels.jl) [@ondelGPUAcceleratedForwardBackwardAlgorithm2021] focuses on GPU acceleration.
+
+## Package design
+
+`HiddenMarkovModels.jl` tries to offer the best of all worlds by being...
+
+* _Generic_. Observations can be arbitrary objects, and the associated distributions only need to implement a loglikelihood and a sampler. Number types are not restricted, and users can extend the abstract HMM interface to incorporate features such as priors or structured transition matrices. In addition, automatic differentiation of the loglikelihood [@qinDirectOptimizationApproach2000] is supported both in forward and reverse mode.
+* _Fast_. Julia's blend of multiple dispatch and just-in-time compilation delivers satisfactory speed even when working with arbitrary objects. The combination of linear algebra subroutines and multithreading enables scaling to several sequences and moderate-sized state spaces. This is demonstrated in the benchmarks below.
+* _Reliable_. The package is thoroughly tested and documented, with an extensive API reference and tutorials. Special care was given to code quality, type stability and compatibility checks with various downstream packages (e.g. exotic array or number formats).
+
+# Benchmarks
+
 
 # Acknowledgements
 
-Maxime Mouchet provided a lot of inspiration through his package [HMMBase.jl](https://github.com/maxmouchet/HMMBase.jl).
-Jacob Schreiber gave advice on how best to benchmark his package [pomegranate](https://github.com/jmschrei/pomegranate).
+A special thanks goes to Maxime Mouchet and Jacob Schreiber, the developers of `HMMBase.jl` and `pomegranate` respectively, for their help and advice.
+In particular, Maxime agreed to declare `HiddenMarkovModels.jl` as the official successor to `HMMBase.jl`, which I am grateful for.
 
 # References