From 3a0daace8761b045b1ca47de1b699a971d692408 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Thu, 29 Feb 2024 15:07:41 +0100 Subject: [PATCH] Update paper --- paper/HMM.bib | 156 +-- paper/images/benchmark.svg | 1968 ++++++++++++++++++++++++++++++++++++ paper/paper.md | 82 +- 3 files changed, 2094 insertions(+), 112 deletions(-) create mode 100644 paper/images/benchmark.svg diff --git a/paper/HMM.bib b/paper/HMM.bib index e3bb7639..6032e6ba 100644 --- a/paper/HMM.bib +++ b/paper/HMM.bib @@ -1,12 +1,12 @@ -@software{antonelloHMMGradientsJlEnables2021, +@misc{antonelloHMMGradientsJlEnables2021, title = {{{HMMGradients}}.Jl: {{Enables}} Computing the Gradient of the Parameters of {{Hidden Markov Models}} ({{HMMs}})}, shorttitle = {Idiap/{{HMMGradients}}.Jl}, - author = {Antonello, Niccolò}, - date = {2021-06-07}, + author = {Antonello, Niccol{\`o}}, + year = {2021}, + month = jun, doi = {10.5281/zenodo.4454565}, - url = {https://doi.org/10.5281/zenodo.4454565}, urldate = {2023-09-12}, - organization = {{Zenodo}}, + howpublished = {Zenodo}, keywords = {hmm}, file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/PEFYSLF7/4906644.html} } @@ -15,10 +15,9 @@ @inproceedings{bengioInputOutputHMM1994 title = {An {{Input Output HMM Architecture}}}, booktitle = {Advances in {{Neural Information Processing Systems}}}, author = {Bengio, Yoshua and Frasconi, Paolo}, - date = {1994}, + year = {1994}, volume = {7}, publisher = {{MIT Press}}, - url = {https://proceedings.neurips.cc/paper/1994/hash/8065d07da4a77621450aa84fee5656d9-Abstract.html}, urldate = {2023-03-12}, abstract = {We introduce a recurrent architecture having a modular structure and we formulate a training procedure based on the EM algorithm. The resulting model has similarities to hidden Markov models, but supports recurrent networks processing style and allows to exploit the supervised learning paradigm while using maximum likelihood estimation.}, keywords = {hmm,thesis}, @@ -28,16 +27,17 @@ @inproceedings{bengioInputOutputHMM1994 @article{besanconDistributionsJlDefinition2021, title = {Distributions{{.jl}}: {{Definition}} and {{Modeling}} of {{Probability Distributions}} in the {{JuliaStats Ecosystem}}}, shorttitle = {Distributions.Jl}, - author = {Besançon, Mathieu and Papamarkou, Theodore and Anthoff, David and Arslan, Alex and Byrne, Simon and Lin, Dahua and Pearson, John}, - date = {2021-07-25}, - journaltitle = {Journal of Statistical Software}, + author = {Besan{\c c}on, Mathieu and Papamarkou, Theodore and Anthoff, David and Arslan, Alex and Byrne, Simon and Lin, Dahua and Pearson, John}, + year = {2021}, + month = jul, + journal = {Journal of Statistical Software}, volume = {98}, pages = {1--30}, issn = {1548-7660}, doi = {10.18637/jss.v098.i16}, - url = {https://doi.org/10.18637/jss.v098.i16}, urldate = {2022-09-19}, abstract = {Random variables and their distributions are a central part in many areas of statistical methods. The Distributions.jl package provides Julia users and developers tools for working with probability distributions, leveraging Julia features for their intuitive and flexible manipulation, while remaining highly efficient through zero-cost abstractions.}, + copyright = {Copyright (c) 2021 Mathieu Besan{\c c}on, Theodore Papamarkou, David Anthoff, Alex Arslan, Simon Byrne, Dahua Lin, John Pearson}, langid = {english}, keywords = {hmm,thesis}, file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/FZ5V2QNZ/Besancon et al_2021_Distributions.pdf} @@ -47,15 +47,14 @@ @article{bezansonJuliaFreshApproach2017 title = {Julia: {{A Fresh Approach}} to {{Numerical Computing}}}, shorttitle = {Julia}, author = {Bezanson, Jeff and Edelman, Alan and Karpinski, Stefan and Shah, Viral B.}, - date = {2017-01}, - journaltitle = {SIAM Review}, - shortjournal = {SIAM Rev.}, + year = {2017}, + month = jan, + journal = {SIAM Review}, volume = {59}, number = {1}, pages = {65--98}, issn = {0036-1445, 1095-7200}, doi = {10.1137/141000671}, - url = {https://epubs.siam.org/doi/10.1137/141000671}, urldate = {2022-12-03}, langid = {english}, keywords = {bootstrap,hmm,inferopt,povar,thesis,viva}, @@ -64,87 +63,102 @@ @article{bezansonJuliaFreshApproach2017 @book{cappeInferenceHiddenMarkov2005, title = {Inference in {{Hidden Markov Models}}}, - author = {Cappé, Olivier and Moulines, Eric and Rydén, Tobias}, - date = {2005}, + author = {Capp{\'e}, Olivier and Moulines, Eric and Ryd{\'e}n, Tobias}, + year = {2005}, series = {Springer {{Series}} in {{Statistics}}}, - eprint = {4d_oEYn8Fl0C}, - eprinttype = {googlebooks}, publisher = {{Springer New York}}, - location = {{New York, NY}}, + address = {{New York, NY}}, doi = {10.1007/0-387-28982-8}, - url = {http://link.springer.com/10.1007/0-387-28982-8}, urldate = {2022-12-03}, + googlebooks = {4d\_oEYn8Fl0C}, isbn = {978-0-387-40264-2 978-0-387-28982-3}, langid = {english}, keywords = {hmm,povar,thesis}, file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/2HYZE7ZD/Cappé et al_2005_Inference in Hidden Markov Models.pdf;/home/gdalle/snap/zotero-snap/common/Zotero/storage/QRNV9CL8/Cappé et al. - 2006 - Inference in Hidden Markov Models.pdf} } -@software{changDynamaxStateSpace2024, +@misc{changDynamaxStateSpace2024, title = {Dynamax: {{State Space Models}} Library in {{JAX}}}, - author = {Chang, Peter and Harper-Donnelly, Giles and Kara, Aleyna and Li, Xinglong and Linderman, Scott and Murphy, Kevin}, - date = {2024-02-22T04:10:59Z}, - origdate = {2022-04-11T23:42:29Z}, - url = {https://github.com/probml/dynamax}, + author = {Chang, Peter and {Harper-Donnelly}, Giles and Kara, Aleyna and Li, Xinglong and Linderman, Scott and Murphy, Kevin}, + year = {2024}, + month = feb, urldate = {2024-02-22}, abstract = {State Space Models library in JAX}, - organization = {{Probabilistic machine learning}}, + copyright = {MIT}, + howpublished = {Probabilistic machine learning}, keywords = {hmm} } -@thesis{dalleMachineLearningCombinatorial2022, - type = {phdthesis}, +@phdthesis{dalleMachineLearningCombinatorial2022, title = {Machine Learning and Combinatorial Optimization Algorithms, with Applications to Railway Planning}, author = {Dalle, Guillaume}, - editora = {Meunier, Frédéric and De Castro, Yohann and Parmentier, Axel}, - editoratype = {collaborator}, - date = {2022-12-16}, - institution = {{École des Ponts ParisTech}}, - url = {https://www.theses.fr/2022ENPC0047}, + year = {2022}, + month = dec, abbr = {Dissertation}, - abstract = {This thesis investigates the frontier between machine learning and combinatorial optimization, two active areas of applied mathematics research. We combine theoretical insights with efficient algorithms, and develop several open source Julia libraries. Inspired by a collaboration with the Société nationale des chemins de fer français (SNCF), we study high-impact use cases from the railway world: train failure prediction, delay propagation, and track allocation.In Part I, we provide mathematical background and describe software implementations for various tools that will be needed later on: implicit differentiation, temporal point processes, Hidden Markov Models and Multi-Agent Path Finding. Our publicly-available code fills a void in the Julia package ecosystem, aiming at ease of use without compromising on performance.In Part II, we highlight theoretical contributions related to both statistics and decision-making. We consider a Vector AutoRegressive process with partial observations, and prove matching upper and lower bounds on the estimation error. We unify and extend the state of the art for combinatorial optimization layers in deep learning, gathering various approaches in a Julia library called InferOpt.jl. We also seek to differentiate through multi-objective optimization layers, which leads to a novel theory of lexicographic convex analysis.In Part III, these mathematical and algorithmic foundations come together to tackle railway problems. We design a hierarchical model of train failures, propose a graph-based framework for delay propagation, and suggest new avenues for track allocation, with the Flatland challenge as a testing ground.}, + abstract = {This thesis investigates the frontier between machine learning and combinatorial optimization, two active areas of applied mathematics research. We combine theoretical insights with efficient algorithms, and develop several open source Julia libraries. Inspired by a collaboration with the Soci{\'e}t{\'e} nationale des chemins de fer fran{\c c}ais (SNCF), we study high-impact use cases from the railway world: train failure prediction, delay propagation, and track allocation.In Part I, we provide mathematical background and describe software implementations for various tools that will be needed later on: implicit differentiation, temporal point processes, Hidden Markov Models and Multi-Agent Path Finding. Our publicly-available code fills a void in the Julia package ecosystem, aiming at ease of use without compromising on performance.In Part II, we highlight theoretical contributions related to both statistics and decision-making. We consider a Vector AutoRegressive process with partial observations, and prove matching upper and lower bounds on the estimation error. We unify and extend the state of the art for combinatorial optimization layers in deep learning, gathering various approaches in a Julia library called InferOpt.jl. We also seek to differentiate through multi-objective optimization layers, which leads to a novel theory of lexicographic convex analysis.In Part III, these mathematical and algorithmic foundations come together to tackle railway problems. We design a hierarchical model of train failures, propose a graph-based framework for delay propagation, and suggest new avenues for track allocation, with the Flatland challenge as a testing ground.}, + collaborator = {Meunier, Fr{\'e}d{\'e}ric and De Castro, Yohann and Parmentier, Axel}, + copyright = {Licence Etalab}, langid = {english}, pdf = {https://pastel.archives-ouvertes.fr/tel-04053322}, + school = {{\'E}cole des Ponts ParisTech}, keywords = {cv,hmm,website}, file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/CEVJMUP4/Dalle - Machine learning and combinatorial optimization al.pdf} } -@software{hmmlearndevelopersHmmlearnHiddenMarkov2023, +@article{danischMakieJlFlexible2021, + title = {Makie.Jl: {{Flexible}} High-Performance Data Visualization for {{Julia}}}, + shorttitle = {Makie.Jl}, + author = {Danisch, Simon and Krumbiegel, Julius}, + year = {2021}, + month = sep, + journal = {Journal of Open Source Software}, + volume = {6}, + number = {65}, + pages = {3349}, + issn = {2475-9066}, + doi = {10.21105/joss.03349}, + urldate = {2024-02-29}, + abstract = {Danisch et al., (2021). Makie.jl: Flexible high-performance data visualization for Julia. Journal of Open Source Software, 6(65), 3349, https://doi.org/10.21105/joss.03349}, + langid = {english}, + keywords = {hmm}, + file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/T4W5S92V/Danisch and Krumbiegel - 2021 - Makie.jl Flexible high-performance data visualiza.pdf} +} + +@misc{hmmlearndevelopersHmmlearnHiddenMarkov2023, title = {Hmmlearn: {{Hidden Markov Models}} in {{Python}}, with Scikit-Learn like {{API}}}, author = {{hmmlearn developers}}, - date = {2023}, - url = {https://github.com/hmmlearn/hmmlearn}, + year = {2023}, urldate = {2023-09-12}, abstract = {Hidden Markov Models in Python, with scikit-learn like API}, - organization = {{hmmlearn}}, + copyright = {BSD-3-Clause}, + howpublished = {hmmlearn}, keywords = {hmm} } -@software{mouchetHMMBaseJlHidden2023, +@misc{mouchetHMMBaseJlHidden2023, title = {{{HMMBase}}.Jl: {{Hidden Markov Models}} for {{Julia}}}, author = {Mouchet, Maxime}, - date = {2023}, - url = {https://github.com/maxmouchet/HMMBase.jl}, + year = {2023}, urldate = {2023-09-12}, abstract = {Hidden Markov Models for Julia.}, + copyright = {MIT}, keywords = {hmm} } @book{murphyProbabilisticMachineLearning2023, title = {Probabilistic Machine Learning: Advanced Topics}, author = {Murphy, Kevin P.}, - date = {2023}, + year = {2023}, publisher = {{The MIT Press}}, - url = {probml.ai}, keywords = {hmm,todo}, file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/DXSP888K/Murphy - 2023 - Probabilistic machine learning advanced topics.pdf;/home/gdalle/snap/zotero-snap/common/Zotero/storage/XMNWZH35/supp2.pdf} } @unpublished{ondelGPUAcceleratedForwardBackwardAlgorithm2021, title = {{{GPU-Accelerated Forward-Backward Algorithm}} with {{Application}} to {{Lattic-Free MMI}}}, - author = {Ondel, Lucas and Lam-Yee-Mui, Léa-Marie and Kocour, Martin and Filippo, Caio and Lukás Burget, Corro}, - date = {2021-11}, - url = {https://hal.science/hal-03434552}, + author = {Ondel, Lucas and {Lam-Yee-Mui}, L{\'e}a-Marie and Kocour, Martin and Filippo, Caio and Luk{\'a}s Burget, Corro}, + year = {2021}, + month = nov, urldate = {2023-09-12}, abstract = {We propose to express the forward-backward algorithm in terms of operations between sparse matrices in a specific semiring. This new perspective naturally leads to a GPU-friendly algorithm which is easy to implement in Julia or any programming languages with native support of semiring algebra. We use this new implementation to train a TDNN with the LF-MMI objective function and we compare the training time of our system with PyChain-a recently introduced C++/CUDA implementation of the LF-MMI loss. Our implementation is about two times faster while not having to use any approximation such as the "leaky-HMM".}, keywords = {hmm}, @@ -154,15 +168,14 @@ @unpublished{ondelGPUAcceleratedForwardBackwardAlgorithm2021 @article{qinDirectOptimizationApproach2000, title = {A {{Direct Optimization Approach}} to {{Hidden Markov Modeling}} for {{Single Channel Kinetics}}}, author = {Qin, Feng and Auerbach, Anthony and Sachs, Frederick}, - date = {2000-10-01}, - journaltitle = {Biophysical Journal}, - shortjournal = {Biophysical Journal}, + year = {2000}, + month = oct, + journal = {Biophysical Journal}, volume = {79}, number = {4}, pages = {1915--1927}, issn = {0006-3495}, doi = {10.1016/S0006-3495(00)76441-1}, - url = {https://www.sciencedirect.com/science/article/pii/S0006349500764411}, urldate = {2022-08-06}, abstract = {Hidden Markov modeling (HMM) provides an effective approach for modeling single channel kinetics. Standard HMM is based on Baum's reestimation. As applied to single channel currents, the algorithm has the inability to optimize the rate constants directly. We present here an alternative approach by considering the problem as a general optimization problem. The quasi-Newton method is used for searching the likelihood surface. The analytical derivatives of the likelihood function are derived, thereby maximizing the efficiency of the optimization. Because the rate constants are optimized directly, the approach has advantages such as the allowance for model constraints and the ability to simultaneously fit multiple data sets obtained at different experimental conditions. Numerical examples are presented to illustrate the performance of the algorithm. Comparisons with Baum's reestimation suggest that the approach has a superior convergence speed when the likelihood surface is poorly defined due to, for example, a low signal-to-noise ratio or the aggregation of multiple states having identical conductances.}, langid = {english}, @@ -173,26 +186,38 @@ @article{qinDirectOptimizationApproach2000 @article{rabinerTutorialHiddenMarkov1989, title = {A Tutorial on Hidden {{Markov}} Models and Selected Applications in Speech Recognition}, author = {Rabiner, L.R.}, - date = {1989-02}, - journaltitle = {Proceedings of the IEEE}, + year = {1989}, + month = feb, + journal = {Proceedings of the IEEE}, volume = {77}, number = {2}, pages = {257--286}, issn = {1558-2256}, doi = {10/cswph2}, abstract = {This tutorial provides an overview of the basic theory of hidden Markov models (HMMs) as originated by L.E. Baum and T. Petrie (1966) and gives practical details on methods of implementation of the theory along with a description of selected applications of the theory to distinct problems in speech recognition. Results from a number of original sources are combined to provide a single source of acquiring the background required to pursue further this area of research. The author first reviews the theory of discrete Markov chains and shows how the concept of hidden states, where the observation is a probabilistic function of the state, can be used effectively. The theory is illustrated with two simple examples, namely coin-tossing, and the classic balls-in-urns system. Three fundamental problems of HMMs are noted and several practical techniques for solving these problems are given. The various types of HMMs that have been studied, including ergodic as well as left-right models, are described.{$<>$}}, - eventtitle = {Proceedings of the {{IEEE}}}, keywords = {done,hmm,thesis,viva}, file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/A68ILRMJ/Rabiner_1989_A tutorial on hidden Markov models and selected applications in speech.pdf;/home/gdalle/snap/zotero-snap/common/Zotero/storage/BEJEKP4E/Rabiner_1989_A tutorial on hidden Markov models and selected applications in speech.pdf;/home/gdalle/snap/zotero-snap/common/Zotero/storage/5BHQF7ME/18626.html} } -@software{rowleyLogarithmicNumbersJlLogarithmic2023, +@misc{rowleyLogarithmicNumbersJlLogarithmic2023, title = {{{LogarithmicNumbers}}.Jl: {{A}} Logarithmic Number System for {{Julia}}.}, author = {Rowley, Christopher}, - date = {2023-05-24T15:06:29Z}, - url = {https://github.com/cjdoris/LogarithmicNumbers.jl}, + year = {2023}, + month = may, urldate = {2023-09-12}, abstract = {A logarithmic number system for Julia.}, + copyright = {MIT}, + keywords = {hmm} +} + +@misc{rowleyPythonCallJlPython2022, + title = {{{PythonCall}}.Jl: {{Python}} and {{Julia}} in Harmony}, + author = {Rowley, Christopher}, + year = {2022}, + urldate = {2024-02-29}, + abstract = {Python and Julia in harmony.}, + copyright = {MIT}, + howpublished = {JuliaPy}, keywords = {hmm} } @@ -200,28 +225,27 @@ @article{schreiberPomegranateFastFlexible2018 title = {Pomegranate: {{Fast}} and {{Flexible Probabilistic Modeling}} in {{Python}}}, shorttitle = {Pomegranate}, author = {Schreiber, Jacob}, - date = {2018}, - journaltitle = {Journal of Machine Learning Research}, + year = {2018}, + journal = {Journal of Machine Learning Research}, volume = {18}, number = {164}, pages = {1--6}, issn = {1533-7928}, - url = {http://jmlr.org/papers/v18/17-636.html}, urldate = {2019-05-16}, keywords = {hmm}, file = {/home/gdalle/snap/zotero-snap/common/Zotero/storage/QKMY8X8M/Schreiber_2018_pomegranate.pdf;/home/gdalle/snap/zotero-snap/common/Zotero/storage/9GWGT5RK/17-636.html} } -@software{whiteJuliaDiffChainRulesJl2022, +@misc{whiteJuliaDiffChainRulesJl2022, title = {{{JuliaDiff}}/{{ChainRules}}{{.jl}}: V1.44.7}, shorttitle = {{{JuliaDiff}}/{{ChainRules}}.Jl}, - author = {White, Frames Catherine and Abbott, Michael and Zgubic, Miha and Revels, Jarrett and Axen, Seth and Arslan, Alex and Schaub, Simeon and Robinson, Nick and Yingbo Ma and Gaurav Dhingra and Tebbutt, Will and Heim, Niklas and Widmann, David and Rosemberg, Andrew David Werner and Schmitz, Niklas and Rackauckas, Christopher and Heintzmann, Rainer and Frankschae and Noack, Andreas and Lucibello, Carlo and Fischer, Keno and Robson, Alex and Cossio and Ling, Jerry and MattBrzezinski and Finnegan, Rory and Zhabinski, Andrei and Wennberg, Daniel and Besançon, Mathieu and Vertechi, Pietro}, - date = {2022-10-10}, + author = {White, Frames Catherine and Abbott, Michael and Zgubic, Miha and Revels, Jarrett and Axen, Seth and Arslan, Alex and Schaub, Simeon and Robinson, Nick and Yingbo Ma and Gaurav Dhingra and Tebbutt, Will and Heim, Niklas and Widmann, David and Rosemberg, Andrew David Werner and Schmitz, Niklas and Rackauckas, Christopher and Heintzmann, Rainer and Frankschae and Noack, Andreas and Lucibello, Carlo and Fischer, Keno and Robson, Alex and Cossio and Ling, Jerry and MattBrzezinski and Finnegan, Rory and Zhabinski, Andrei and Wennberg, Daniel and Besan{\c c}on, Mathieu and Vertechi, Pietro}, + year = {2022}, + month = oct, doi = {10.5281/ZENODO.4754896}, - url = {https://zenodo.org/record/4754896}, urldate = {2022-10-13}, abstract = {ChainRules v1.44.7 Diff since v1.44.6 {$<$}strong{$>$}Closed issues:{$<$}/strong{$>$} cat with Val tuple dims fails (\#678) {$<$}strong{$>$}Merged pull requests:{$<$}/strong{$>$} Fix for ChainRulesCore \#586 (\#675) (@rofinn) fix cat rrule (\#679) (@cossio)}, - organization = {{Zenodo}}, - version = {v1.44.7}, + copyright = {Open Access}, + howpublished = {Zenodo}, keywords = {#nosource,hmm,inferopt,thesis} } diff --git a/paper/images/benchmark.svg b/paper/images/benchmark.svg new file mode 100644 index 00000000..d60ee3c3 --- /dev/null +++ b/paper/images/benchmark.svg @@ -0,0 +1,1968 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/paper/paper.md b/paper/paper.md index 5d6c9364..c4785fcd 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -1,5 +1,5 @@ --- -title: 'HiddenMarkovModels.jl: generic, fast and reliable latent variable modeling' +title: 'HiddenMarkovModels.jl: generic, fast and reliable state space modeling' tags: - Julia - statistics @@ -11,11 +11,11 @@ authors: orcid: 0000-0003-4866-1687 affiliation: "1, 2, 3" affiliations: - - name: Ecole Polytechnique Fédérale de Lausanne (EPFL), Statistical Physics of Computation laboratory, CH-1015 Lausanne, Switzerland + - name: Information, Learning and Physics laboratory, Ecole Polytechnique Fédérale de Lausanne (EPFL), Station 11, CH-1015 Lausanne index: 1 - - name: Ecole Polytechnique Fédérale de Lausanne (EPFL), Information, Learning and Physics laboratory, CH-1015 Lausanne, Switzerland + - name: Information and Network Dynamics laboratory, Ecole Polytechnique Fédérale de Lausanne (EPFL), Station 14, CH-1015 Lausanne index: 2 - - name: Ecole Polytechnique Fédérale de Lausanne (EPFL), Information and Network Dynamics laboratory, CH-1015 Lausanne, Switzerland + - name: Statistical Physics of Computation laboratory, Ecole Polytechnique Fédérale de Lausanne (EPFL), CH-1015 Lausanne index: 3 date: 22 February 2024 bibliography: HMM.bib @@ -46,41 +46,22 @@ The package `HiddenMarkovModels.jl` leverages the Julia language [@bezansonJulia The initial motivation for HiddenMarkovModels.jl was an application of HMMs to reliability analysis for the French railway company SNCF [@dalleMachineLearningCombinatorial2022]. In this industrial use case, the observations were marked temporal point processes (sequences of timed events with structured metadata) generated by condition monitoring systems, possibly influenced by the daily activity of the train unit. -Unfortunately, nearly all implementations of HMMs we surveyed (in Julia and Python) expect the observations to be generated by a _predefined set of distributions_, with _no control dependency_. -In Julia, the reference package `HMMBase.jl` [@mouchetHMMBaseJlHidden2023] requires compliance with the `Distributions.jl` [@besanconDistributionsJlDefinition2021] interface, which precludes anything not scalar- or array-valued, let alone point processes. -In Python, `hmmlearn` [@hmmlearndevelopersHmmlearnHiddenMarkov2023], `pomegranate` [@schreiberPomegranateFastFlexible2018] each offer a catalogue of discrete and continuous distributions, but do not allow for easy extension by the user. -The more recent `dynamax` [@changDynamaxStateSpace2024] is the only package adopting an extensible interface with optional controls, similar to ours. +Unfortunately, nearly all implementations of HMMs we surveyed (in Julia and Python) expect the observations to be generated by a _predefined set of distributions_, with _no temporal heterogeneity_. +In Julia, the previous reference package `HMMBase.jl` [@mouchetHMMBaseJlHidden2023] requires compliance with the `Distributions.jl` [@besanconDistributionsJlDefinition2021] interface, which precludes anything not scalar- or array-valued, let alone point processes. +In Python, the `numpy`-based `hmmlearn` [@hmmlearndevelopersHmmlearnHiddenMarkov2023] and the `PyTorch`-based `pomegranate` [@schreiberPomegranateFastFlexible2018] each offer a catalogue of discrete and continuous distributions, but do not allow for easy extension by the user. +The more recent `JAX`-based `dynamax` [@changDynamaxStateSpace2024] is the only package adopting an extensible interface with optional controls, similar to ours. -Focusing on Julia specifically, other downsides of `HMMBase.jl` include the lack of support for _multiple observation sequences_ or _sparse transition matrices_, and the mandatory use of _64-bit floating point numbers_. -Two other packages provide functionalities that `HMMBase.jl` lacks: `HMMGradients.jl` [@antonelloHMMGradientsJlEnables2021] contains a _differentiable loglikelihood function_, while `MarkovModels.jl` [@ondelGPUAcceleratedForwardBackwardAlgorithm2021] focuses on GPU acceleration. -Unfortunately, all three have mutually incompatible APIs. +Focusing on Julia specifically, other downsides of `HMMBase.jl` include the lack of support for _multiple observation sequences_, _automatic differentiation_, _sparse transition matrices_ or _number types beyond 64-bit floating point_. +Two other Julia packages each provide a subset of functionalities that `HMMBase.jl` lacks, namely `HMMGradients.jl` [@antonelloHMMGradientsJlEnables2021] and `MarkovModels.jl` [@ondelGPUAcceleratedForwardBackwardAlgorithm2021], but they are less developed and inappropriate for general users. # Package design -`HiddenMarkovModels.jl` was designed to overcome the limitations mentioned above, with the following guiding principles in mind. +`HiddenMarkovModels.jl` was designed to overcome the limitations mentioned above, following a few guiding principles. -It is _generic_. +Our package is _generic_. Observations can be arbitrary objects, and the associated distributions only need to implement two methods: a loglikelihood `logdensityof(dist, x)` and a sampler `rand(rng, x)`. -Once those methods exist, model creation is very easy: - -```julia -using Distributions, HiddenMarkovModels -init = [0.4, 0.6] -trans = [0.9 0.1; 0.2 0.8] -dists = [Normal(-1.0), Normal(1.0)] -hmm = HMM(init, trans, dists) -``` - -Model use is just as intuitive: - -```julia -state_seq, obs_seq = rand(hmm, 100) -logdensityof(hmm, obs_seq) -forward_backward(hmm, obs_seq) -baum_welch(hmm, obs_seq) -``` - -The extendable `AbstractHMM` interface allows incorporating features such as priors or structured transition matrices, as well as control variables, simply by defining three methods: +Number types are not restricted, and automatic differentiation of the sequence loglikelihood [@qinDirectOptimizationApproach2000] is supported both in forward and reverse mode, partly thanks to `ChainRulesCore.jl` [@whiteJuliaDiffChainRulesJl2022]. +The extendable `AbstractHMM` interface allows incorporating features such as priors or structured transitions, as well as temporal or control dependency, simply by redefining three methods: ```julia initialization(hmm) @@ -88,33 +69,42 @@ transition_matrix(hmm, control) obs_distributions(hmm, control) ``` -Number types are not restricted, and automatic differentiation of the sequence loglikelihood [@qinDirectOptimizationApproach2000] is supported both in forward and reverse mode. - -It is _fast_. -Julia's blend of multiple dispatch and just-in-time compilation delivers satisfactory speed even when working with unexpected types. +Our package is _fast_. +Julia's blend of multiple dispatch and just-in-time compilation delivers satisfactory speed even when working with unexpected types that Python's tensor backends could not easily handle. Inference routines rely on BLAS calls for linear algebra, and exploit multithreading to process sequences in parallel. -It is _reliable_. -The package is thoroughly tested and documented, with an extensive API reference and accessible tutorials. -Special care was given to code quality, type stability, and compatibility checks with various downstream packages. +Our package is _reliable_. +It is thoroughly tested and documented, with an extensive API reference and accessible tutorials. +Special care was given to code quality, type stability, and compatibility checks with various downstream packages (like automatic differentiation packages). -However, it is also _limited in scope_. -It centers around CPU efficiency, and remains untested on GPU. -Furthermore, it does not perform probability computations in the logarithmic domain, but instead uses the scaling trick [@rabinerTutorialHiddenMarkov1989] with a variation borrowed from `HMMBase.jl`. +However, our package is also _limited in scope_. +It aims at CPU efficiency for moderately-sized state spaces, and remains untested on GPU. +Furthermore, it does not manipulate probabilities in the logarithmic domain, but instead uses the scaling trick [@rabinerTutorialHiddenMarkov1989] with a variation borrowed from `HMMBase.jl`. Thus, its numerical stability might be worse than that of Python counterparts on challenging instances. Luckily, thanks to unrestricted number types, users are free to bring in third-party packages like `LogarithmicNumbers.jl` [@rowleyLogarithmicNumbersJlLogarithmic2023] to recover additional precision. # Benchmarks -We compare `HiddenMarkovModels.jl`, `HMMBase.jl`, `hmmlearn`, `pomegranate` and `dynamax` on a test case with multivariate Gaussian observations. +We compare `HiddenMarkovModels.jl`, `HMMBase.jl`, `hmmlearn`, `pomegranate` and `dynamax` on a test case with univariate Gaussian observations. +The reason for this low-dimensional choice is to spend most of the time in the generic HMM routines themselves, as opposed to the loglikelihood computations which are problem-specific. +The data consists of $50$ independent sequences of length $100$ each, with a number of states varying from $2$ to $10$,, to which we apply all inference algorithms (with Baum-Welch performing $5$ iterations). + +All benchmarks were run from Julia with `BenchmarkTools.jl` [@chenRobustBenchmarkingNoisy2016], calling Python when necessary with `PythonCall.jl` [@rowleyPythonCallJlPython2022], and the plots come from `CairoMakie.jl` [@danischMakieJlFlexible2021]. +The code is accessible in the `libs/HMMComparison/experiments` subfolder of our GitHub repository. + +![Benchmark of HMM packages](images/benchmark.svg) + +As we can see, `HiddenMarkovModels.jl` is the fastest option in Julia, and the second-fastest overall behind `dynamax` (we think the large runtimes of `dynamax` in Baum-Welch might stem from [incorrect benchmarks](https://github.com/probml/dynamax/issues/359)). +The key observation is that we achieved this speedup over `HMMBase.jl` while _simultaneously increasing generality_ in half a dozen different ways. # Conclusion -`HiddenMarkovModels.jl` fills a longstanding gap in the Julia package ecosystem, and might even prove interesting for Python users who desire additional flexibility. +`HiddenMarkovModels.jl` fills a longstanding gap in the Julia package ecosystem, by providing an efficient and flexible framework for state space modeling. # Acknowledgements -Work on this package started during my PhD at École des Ponts, in partnership with SNCF Réseau and SNCF Voyageurs, whose support I acknowledge, and continued during my postdoctoral position at EPFL. +Work on this package started during my PhD at École des Ponts, in partnership with SNCF Réseau and SNCF Voyageurs, whose support I acknowledge. +It continued during my postdoctoral position at EPFL. My gratitude goes to Maxime Mouchet and Jacob Schreiber, the developers of `HMMBase.jl` and `pomegranate` respectively, for their help and advice. In particular, Maxime agreed to designate `HiddenMarkovModels.jl` as the official successor to `HMMBase.jl`, for which I thank him.