From 91dfc0aaeb65d7abceed7e38d13f32a75d7f1f09 Mon Sep 17 00:00:00 2001 From: vfisikop Date: Tue, 12 Mar 2024 17:45:22 +0200 Subject: [PATCH] Add JOSS paper --- joss_paper/README | 7 + joss_paper/paper.bib | 360 +++++++++++++++++++++++++++++++++++++++++++ joss_paper/paper.md | 121 +++++++++++++++ 3 files changed, 488 insertions(+) create mode 100644 joss_paper/README create mode 100644 joss_paper/paper.bib create mode 100644 joss_paper/paper.md diff --git a/joss_paper/README b/joss_paper/README new file mode 100644 index 000000000..46dcfe919 --- /dev/null +++ b/joss_paper/README @@ -0,0 +1,7 @@ +Compile it locally with + +`docker run --rm --volume $PWD/joss_paper:/data --user $(id -u):$(id -g) --env JOURNAL=joss openjournals/inara` + +More instructions on + +https://joss.readthedocs.io/en/latest/submitting.html#example-paper-and-bibliography \ No newline at end of file diff --git a/joss_paper/paper.bib b/joss_paper/paper.bib new file mode 100644 index 000000000..b14a11504 --- /dev/null +++ b/joss_paper/paper.bib @@ -0,0 +1,360 @@ +@article{Chalkis:2021, + author = {Apostolos Chalkis and Vissarion Fisikopoulos}, + title = {{volesti: Volume Approximation and Sampling for Convex + Polytopes in R}}, + year = {2021}, + journal = {{The R Journal}}, + doi = {10.32614/RJ-2021-077}, + url = {https://doi.org/10.32614/RJ-2021-077}, + pages = {642--660}, + volume = {13}, + number = {2} +} + +@article{Chalkis_dingo:2023, + author = {Apostolos Chalkis and Vissarion Fisikopoulos and Elias Tsigaridas and Haris Zafeiropoulos}, + title = {dingo: a Python package for metabolic flux sampling}, + elocation-id = {2023.06.18.545486}, + year = {2023}, + doi = {10.1101/2023.06.18.545486}, + publisher = {Cold Spring Harbor Laboratory}, + URL = {https://www.biorxiv.org/content/early/2023/06/20/2023.06.18.545486}, + eprint = {https://www.biorxiv.org/content/early/2023/06/20/2023.06.18.545486.full.pdf}, + journal = {bioRxiv} +} + +@article{Iyengar:1988, + author = {S. Iyengar}, + journal = {SIAM Journal on Scientific and Statistical Computing}, + number = {3}, + pages = {418--423}, + title = {Evaluation of Normal Probabilities of Symmetric + Regions}, + volume = {9}, + year = {1988}, + url = {https://doi.org/10.1137/0909028}, +} + +@article{Somerville:1998, + author = {P.N. Somerville}, + journal = {Journal of Computational and Graphical Statistics}, + number = {4}, + pages = {529-544}, publisher = {Taylor \& Francis}, + title = {Numerical Computation of Multivariate Normal and + Multivariate-t Probabilities over Convex Regions}, + volume = {7}, + year = {1998}, + url = {https://doi.org/10.1080/10618600.1998.10474793}, +} + +@book{Genz:2009, + author = {Genz, A. and Bretz, F.}, + edition = {1st}, + publisher = {Springer Publishing Company, Incorporated}, + title = {Computation of Multivariate Normal and t + Probabilities}, + year = {2009}, + isbn = {364201688X, 9783642016882}, +} + +@article{Schellenberger:2009, + author = {J. Schellenberger and B.O. Palsson}, + journal = {The Journal of biological Chemistry}, + pages = {5457-61}, + title = {Use of Randomized Sampling for Analysis of Metabolic + Networks}, + volume = {284 9}, + year = {2009}, + url = {https://doi.org/10.1074/jbc.R800048200}, +} + +@article{Venzke:2019, + author = {A. Venzke and D.K. Molzahn and S. Chatzivasileiadis}, + journal = {Electric Power Systems Research}, + pages = {106614}, + title = {Efficient creation of datasets for data-driven power + system applications}, + volume = {190}, + year = {2021}, + issn = {0378-7796}, + url = {https://doi.org/10.1016/j.epsr.2020.106614}, +} + +@article{Dyer:1988, + author = {Dyer, M. and Frieze, A.}, + journal = {SIAM Journal on Computing}, + number = {5}, + pages = {967-974}, + title = {On the Complexity of Computing the Volume of a + Polyhedron}, + volume = {17}, + year = {1988}, + url = {https://doi.org/10.1137/0217060}, +} + +@article{Chen:2018, + author = {Y. Chen and R. Dwivedi and M.J. Wainwright and B. Yu}, + journal = {Journal of Machine Learning Research}, + number = {55}, + pages = {1--86}, + title = {Fast {MCMC} Sampling Algorithms on Polytopes}, + volume = {19}, + year = {2018}, + url = {http://jmlr.org/papers/v19/18-158.html}, +} + +@inproceedings{Lee:2018, + author = {Y.T. Lee and S. Vempala}, + booktitle = {Proceedings of the 50th Annual ACM SIGACT Symposium + on Theory of Computing}, + pages = {1115--1121}, + series = {STOC 2018}, + title = {Convergence Rate of {R}iemannian {H}amiltonian {Monte + Carlo} and Faster Polytope Volume Computation}, + year = {2018}, + isbn = {978-1-4503-5559-9}, + url = {https://doi.org/10.1145/3188745.3188774}, +} + +@inproceedings{Mangoubi:2019, + author = {O. {Mangoubi} and N. K. {Vishnoi}}, + booktitle = {2019 IEEE 60th Annual Symposium on Foundations of + Computer Science (FOCS)}, + pages = {1338-1357}, + title = {Faster Polytope Rounding, Sampling, and Volume + Computation via a Sub-Linear Ball Walk}, + year = {2019}, + url = {https://doi.org/10.1109/FOCS.2019.00082}, +} + +@article{Lovasz:2006, + address = {Philadelphia, PA, USA}, + author = {Lov\'{a}sz, L. and Vempala, S.}, + journal = {SIAM Journal on Computing}, + number = {4}, + pages = {985-1005}, + publisher = {Society for Industrial and Applied Mathematics}, + title = {Hit-and-Run from a Corner}, + volume = {35}, + year = {2006}, + issn = {0097-5397}, + url = {https://doi.org/10.1137/S009753970544727X}, +} + +@article{Emiris:2014, + address = {New York, USA}, + author = {{I.Z.} Emiris and V. Fisikopoulos}, + journal = {ACM Transactions of Mathematical Software, 2018}, + number = {4}, + pages = {38:1--38:21}, + publisher = {ACM}, + title = {Practical Polytope Volume Approximation}, + volume = {44}, + year = {2014}, + issn = {0098-3500}, + url = {https://doi.org/10.1145/3194656}, +} + +@article{Cousins:2015, + address = {Berlin}, + author = {B. Cousins and S. Vempala}, + journal = {Mathematical Programming Computation}, + month = {Jun}, + number = {2}, + publisher = {Springer-Verlag}, + title = {A Practical Volume Algorithm}, + volume = {8}, + year = {2016}, + url = {https://doi.org/10.1007/s12532-015-0097-z}, +} + +@article{Chalkis_volume:2023, +author = {Chalkis, Apostolos and Emiris, Ioannis Z. and Fisikopoulos, Vissarion}, +title = {A Practical Algorithm for Volume Estimation based on Billiard Trajectories and Simulated Annealing}, +year = {2023}, +issue_date = {December 2023}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +volume = {28}, +issn = {1084-6654}, +url = {https://doi.org/10.1145/3584182}, +doi = {10.1145/3584182}, +journal = {ACM J. Exp. Algorithmics}, +month = {may}, +articleno = {1.3}, +numpages = {34}, +keywords = {volume approximation, mathematical software, randomized algorithm, billiard trajectories, polytope representations, sampling, Random walk} +} + +@MISC{eigen, + author = {Ga\"{e}l Guennebaud and Beno\^{i}t Jacob and others}, + title = {Eigen v3}, + howpublished = {http://eigen.tuxfamily.org}, + year = {2010} + } + +@MISC{mkl, + title = {Intel Math Kernel Library (Intel MKL)}, + howpublished = {https://software.intel.com/en-us/intel-mkl}, + year = {2024} + } + + @book{Ziegler:1995, + address = {New York}, + author = {Ziegler, Günter M.}, + booktitle = {Graduate texts in mathematics, 152}, + description = {Lectures on Polytopes}, + issn = {0387943293 9780387943299 3540943293 9783540943297 038794365X 9780387943657 354094365X 9783540943655}, + publisher = {Springer-Verlag}, + title = {Lectures on polytopes}, + year = 1995 +} + +@article{Ramana:1999, +author = {Ramana, Motakuri and Goldman, A.}, +year = {1999}, +month = {02}, +pages = {}, +title = {Some Geometric Results in Semidefinite Programming}, +volume = {7}, +journal = {Journal of Global Optimization}, +doi = {10.1007/BF01100204} +} + +@misc{Spallitta:2024, + title={Enhancing SMT-based Weighted Model Integration by Structure Awareness}, + author={Giuseppe Spallitta and Gabriele Masina and Paolo Morettin and Andrea Passerini and Roberto Sebastiani}, + year={2024}, + eprint={2302.06188}, + archivePrefix={arXiv}, + primaryClass={cs.AI} +} + +@inproceedings{Kook:2022, + author = {Kook, Yunbum and Lee, Yin-Tat and Shen, Ruoqi and Vempala, Santosh}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh}, + pages = {31684--31696}, + publisher = {Curran Associates, Inc.}, + title = {Sampling with Riemannian Hamiltonian Monte Carlo in a Constrained Space}, + url = {https://proceedings.neurips.cc/paper_files/paper/2022/file/cdaa7f07b0c5a7803927d20aa717132e-Paper-Conference.pdf}, + volume = {35}, + year = {2022} +} + +@article{Chalkis_hmc:2023, +author = {Chalkis, Apostolos and Fisikopoulos, Vissarion and Papachristou, Marios and Tsigaridas, Elias}, +title = {Truncated Log-concave Sampling for Convex Bodies with Reflective Hamiltonian Monte Carlo}, +year = {2023}, +issue_date = {June 2023}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +volume = {49}, +number = {2}, +issn = {0098-3500}, +url = {https://doi.org/10.1145/3589505}, +doi = {10.1145/3589505}, +abstract = {We introduce Reflective Hamiltonian Monte Carlo (ReHMC), an HMC-based algorithm to sample from a log-concave distribution restricted to a convex body. The random walk is based on incorporating reflections to the Hamiltonian dynamics such that the support of the target density is the convex body. We develop an efficient open source implementation of ReHMC and perform an experimental study on various high-dimensional datasets. The experiments suggest that ReHMC outperforms Hit-and-Run and Coordinate-Hit-and-Run regarding the time it needs to produce an independent sample, introducing practical truncated sampling in thousands of dimensions.}, +journal = {ACM Trans. Math. Softw.}, +month = {jun}, +articleno = {16}, +numpages = {25}, +keywords = {Statistical software, truncated sampling, geometric random walks, experiments, mixing time} +} + +@article{Chalkis_spectra:2022, +title = {Efficient sampling in spectrahedra and volume approximation}, +journal = {Linear Algebra and its Applications}, +volume = {648}, +pages = {205-232}, +year = {2022}, +issn = {0024-3795}, +doi = {https://doi.org/10.1016/j.laa.2022.04.002}, +url = {https://www.sciencedirect.com/science/article/pii/S0024379522001471}, +author = {Apostolos Chalkis and Ioannis Z. Emiris and Vissarion Fisikopoulos and Panagiotis Repouskos and Elias Tsigaridas}, +keywords = {Spectahedron, Semidefinite-programming, Sampling, Random walk, Monte Carlo, Polynomial eigenvalue problem, Volume approximation, Optimization}, +abstract = {We present algorithmic, complexity, and implementation results on the problem of sampling points from a spectrahedron, that is, the feasible region of a semidefinite program. Our main tool is geometric random walks. We analyze the arithmetic and bit complexity of certain primitive geometric operations that are based on the algebraic properties of spectrahedra and the polynomial eigenvalue problem. This study leads to the implementation of a broad collection of random walks for sampling from spectrahedra that experimentally show faster mixing times than methods currently employed either in theoretical studies or in applications, including the popular family of Hit-and-Run walks. The different random walks offer a variety of advantages, thus allowing us to efficiently sample from general probability distributions, for example the family of log-concave distributions which arise in numerous applications. We focus on two major applications of independent interest: (i) approximate the volume of a spectrahedron, and (ii) compute the expectation of functions coming from robust optimal control. We exploit efficient linear algebra algorithms and implementations to address the aforementioned computations in very high dimension. In particular, we provide a C++ open source implementation of our methods that scales efficiently, for the first time, up to dimension 200. We illustrate its efficiency on various data sets.} +} + + + @inproceedings{cftz-socg021, + author = {Apostolos Chalkis and + Vissarion Fisikopoulos and + Elias P. Tsigaridas and + Haris Zafeiropoulos}, + editor = {Kevin Buchin and + {\'{E}}ric Colin de Verdi{\`{e}}re}, + title = {Geometric Algorithms for Sampling the Flux Space of Metabolic Networks}, + booktitle = {37th International Symposium on Computational Geometry, SoCG 2021, + June 7-11, 2021, Buffalo, NY, {USA} (Virtual Conference)}, + series = {LIPIcs}, + volume = {189}, + pages = {21:1--21:16}, + publisher = {Schloss Dagstuhl - Leibniz-Zentrum f{\"{u}}r Informatik}, + year = {2021}, + url = {https://doi.org/10.4230/LIPIcs.SoCG.2021.21}, + doi = {10.4230/LIPICS.SOCG.2021.21}, + +} + + +@article{ccef-crises-j, + title={Practical volume approximation of high-dimensional convex bodies, applied to modeling portfolio dependencies and financial crises}, + author={Cal{\`e}s, Ludovic and Chalkis, Apostolos and Emiris, Ioannis Z and Fisikopoulos, Vissarion}, + journal={Computational Geometry}, + volume={109}, + pages={101916}, + year={2023}, + publisher={Elsevier} +} + + + + @inproceedings{bcft-aistats-23, + title={Randomized geometric tools for anomaly detection in stock markets}, + author={Bachelard, Cyril and Chalkis, Apostolos and Fisikopoulos, Vissarion and Tsigaridas, Elias}, + booktitle={International Conference on Artificial Intelligence and Statistics}, + pages={9400--9416}, + year={2023}, + organization={PMLR} +} + + + @article{bcft-arxiv-24, + title={Randomized Control in Performance Analysis and Empirical Asset Pricing}, + author={Bachelard, Cyril and Chalkis, Apostolos and Fisikopoulos, Vissarion and Tsigaridas, Elias}, + journal={arXiv preprint arXiv:2403.00009}, + year={2024} +} + +@inproceedings{co-alenex-2021, + title={Computation of large asymptotics of 3-manifold quantum invariants}, + author={Maria, Cl{\'e}ment and Rouill{\'e}, Owen}, + booktitle={2021 Proceedings of the Workshop on Algorithm Engineering and Experiments (ALENEX)}, + pages={193--206}, + year={2021}, + organization={SIAM} +} + + +@article{vm-fods-2022, + title={Multiple hypothesis testing with persistent homology}, + author={Vejdemo-Johansson, Mikael and Mukherjee, Sayan}, + journal={Foundations of Data Science}, + volume={4}, + number={4}, + pages={667--705}, + year={2022}, + publisher={Foundations of Data Science} +} + + + + @PhdThesis{pham-phd-2024, + author = {Long Pham}, + title = {Hybrid Resource-Bound +Analyses of Programs}, + school = {Carnegie Mellon University}, + year = 2024, + note = {(PhD thesis proposal)}} + diff --git a/joss_paper/paper.md b/joss_paper/paper.md new file mode 100644 index 000000000..06b30ef4a --- /dev/null +++ b/joss_paper/paper.md @@ -0,0 +1,121 @@ +--- +title: 'volesti: C++ library for sampling and volume computation on convex bodies' +tags: + - C++ + - geometry + - randomization + - Monte-Carlo methods + - convexity +authors: + - name: Apostolos Chalkis + orcid: 0000-0000-0000-0000 + equal-contrib: true + affiliation: "2, 4" # (Multiple affiliations must be quoted) + - name: Vissarion Fisikopoulos + corresponding: true # (This is how to denote the corresponding author) + equal-contrib: true # (This is how you can denote equal contributions between multiple authors) + affiliation: "1, 4" + - name: Marios Papachristou + equal-contrib: true # (This is how you can denote equal contributions between multiple authors) + affiliation: 5 + - name: Elias Tsigaridas + equal-contrib: true # (This is how you can denote equal contributions between multiple authors) + affiliation: "3, 4" +affiliations: + - name: National & Kapodistrian University of Athens, Greece + index: 1 + - name: Quantagonia + index: 2 + - name: Inria Paris and IMJ-PRG, Sorbonne Universit\`e + index: 3 + - name: GeomScale + index: 4 + - name: Cornell University + index: 5 +date: 11 March 2024 +bibliography: paper.bib + +--- + +# Summary + +Sampling from (constrained) high-dimensional distributions and volume approximation of convex +bodies are fundamental operations that appear in optimization, finance, +engineering, artificial intelligence, and machine learning. +We present `volesti`, a C++ library that delivers efficient implementations of state-of-the-art, mainly randomized, algorithms +to sample from general logconcave distributions. +Based on these routines can estimate the volume of convex bodies in high dimensions, +round them and also compute multidimensional integrals over them. +The backbone of our library consists of Monte-Carlo algorithms, +that are randomized algorithms, the output of which can be incorrect with (usually very small) error probability; thus, we also provide several +high-dimensional statistical tests to certify and verify the output. + +The focus of `volesti`' is scalability in high dimensions, +that, depending on the problem at hand, could be in the order of hundreds or thousands dimension. +Another novelty is the ability to handle a variety of different inputs +for the constrained support of the various distributions. +`volesti` supports three different types of polyhedra [@Ziegler:1995], spectrahedra [@Ramana:1999] +and general non-linear convex objects. + +`volesti` relies on `Eigen` library [@eigen] for linear algebra but also support `MKL` optimizations [@mkl]. +There are R [@Chalkis:2021] and Python [@Chalkis_dingo:2023] interfaces available; +alas not all C++ functionality is available in through these interfaces. + +# Statement of need + +High-dimensional sampling from multivariate distributions with Markov Chain Monte Carlo (MCMC) +algorithms is a fundamental problem with many applications in the whole spectrum of science and engineering [@Iyengar:1988; +@Somerville:1998; @Genz:2009; @Schellenberger:2009]. +In particular, multivariate integration over a convex set +as well as the volume approximation of convex sets +have accumulated a huge amount of effort from theorists and engineers over the last decades. +Nevertheless, these problems are computationally hard for general dimensions [@Dyer:1988]. +MCMC algorithms made remarkable progress +and their use allowed us to efficiently tackle the problems of sampling and +volume estimation of convex bodies in theory, +by the introduction of (ragher sharp) theoretical guarantees [@Chen:2018; @Lee:2018; +@Mangoubi:2019]. +Unfortunately, these theoretical guarantees of the MCMC algorithms +do not extend in an straightforward manner to efficient implementations able to attack problems coming from real-life computations. +Therefore, we witnessed the birth of efficient in practice MCMC algorithm +that they relax the theoretical guarantees and +and employ new algorithmic and statistical techniques +to be amenable to efficient implementations. +Remarkably, these algorithms, and the corresponding implementations, +also meet the requirements for high accuracy results +[@Emiris:2014; @Cousins:2015; @Chalkis_volume:2023; @Kook:2022]. +Let us mention that the volume algorithm of @Cousins:2015 and the sampling method of @Kook:2022 are available as `MATLAB` +packages. + +All aforementioned algorithms and techniques are available in `volesti` +along with the sampling algorithm by +@Chalkis_hmc:2023 and the algorithms for spectrahedra by @Chalkis_spectra:2022. + +The efficient implementations of `volesti +(i) suport various sampling techniques based on geometric walks, roughly speaking these are a continuous version of MCMC algorithms, like Billard walk, Hamiltonian walk and other, +(ii) give us the ability to sample from various distributions, like uniform, log-concave, exponential, and Gaussian, +(iii) allows to consider the distributions + constrained in various convex domains, like hypercubes, zonotopes, general polytopes (in H and V representations), spectrahedra, + and (iv) can perform volume computations, integration, and solve problem from real life applications in very high dimensions. + + + +We use `volesti` extensively in various research and engineering directions that we pursue. +In particular, for the problem of sampling the flux space of metabolic networks +we were able to sample from the most complicated human metabolic network accessible today, Recon3D [@cftz-socg021], +we use to model financial crises [@ccef-crises-j], +to detect low volatility anomalies in stock markets [@bcft-aistats-23], + to introduce randomized control in asset pricing and portfolio performance evaluation [@bcft-arxiv-24]), but also to sample from (and compute the volume of) spectrahedra [@Chalkis_spectra:2022], the feasible regions of semidefinite programs. + +Even more, `volesti` has been used in conducting research in electric power systems [@Venzke:2019], for problems +in probabilistic inference [@Spallitta:2024], +to perform resource analysis on programs [@pham-phd-2024]; +but also to more theoretical and mathematical challenges, like the computation of topological invariants [@co-alenex-2021] + and persistent homology [@vm-fods-2022]. + +# Acknowledgements + +We would like to thank the contributors to the `volesti` library for their valuable contributions and +feedback. + +# References