diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 00000000..9bcfa51f --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,15 @@ +### Issue Description +A clear and concise description of the issue. If it's a feature request, please add [Feature Request] to the title. + +### Steps to Reproduce +Please provide steps to reproduce the issue attaching any error messages and stack traces. + +### Expected Behavior +What did you expect to happen? + +### System Info +Please provide information about your setup +- PyTorch Version (run `print(torch.__version__)` +- Python version + +### Additional Context diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..5e893cca --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,23 @@ +### Motivation +Please describe your motivation for the changes. Provide link to any related issues. + +### Changes proposed +Outline the proposed changes and alternatives considered. + +### Test Plan +Please provide clear instructions on how the changes were verified. Attach screenshots if applicable. + +### Types of changes +- [ ] Docs change / refactoring / dependency upgrade +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to change) + +### Checklist +- [ ] My code follows the code style of this project. +- [ ] My change requires a change to the documentation. +- [ ] I have updated the documentation accordingly. +- [ ] I have read the **[CONTRIBUTING](https://github.com/facebookincubator/flowtorch/blob/main/CONTRIBUTING.md)** document. +- [ ] I have added tests to cover my changes. +- [ ] All new and existing tests passed. +- [ ] The title of my pull request is a short description of the requested changes. diff --git a/.github/workflows/deploy-on-release.yml b/.github/workflows/deploy-on-release.yml new file mode 100644 index 00000000..4f8a998f --- /dev/null +++ b/.github/workflows/deploy-on-release.yml @@ -0,0 +1,72 @@ +name: Deploy On Release + +on: + release: + types: [created] + +jobs: + tests-and-coverage-pip: + name: Tests and coverage (pip, Python ${{ matrix.python-version }}, ${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest", "macos-latest", "windows-latest"] + python-version: ['3.7', '3.8', '3.9'] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install (auto-install dependencies) + run: | + python -m pip install --upgrade pip + pip install -e .[test] + - name: Test with pytest + run: | + pytest --cov=tests --cov-report=xml -W ignore::DeprecationWarning tests/ + - name: Upload coverage to Codecov + if: ${{ runner.os == 'Linux' && matrix.python-version == 3.7 }} + uses: codecov/codecov-action@v1 + with: + token: 9667eb01-c300-4166-b8ba-605deb2682e4 + files: coverage.xml + directory: ./ + flags: unittests + env_vars: OS,PYTHON + name: codecov-umbrella + fail_ci_if_error: true + path_to_write_report: ./codecov_report.txt + verbose: true + + release-pypi: + name: Release to pypi.org + runs-on: ubuntu-latest + needs: tests-and-coverage-pip + strategy: + fail-fast: true + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Set up Python 3.7 + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Install packaging dependencies + run: | + python -m pip install --upgrade pip + pip install --upgrade setuptools wheel + - name: Build source distribution + run: python setup.py sdist bdist_wheel + - name: Upload to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + verbose: true + user: __token__ + password: ${{ secrets.PYPI_PASSWORD }} + # swap for test PyPI + # password: ${{ secrets.TEST_PYPI_API_TOKEN }} + # repository_url: https://test.pypi.org/legacy/ diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 00000000..a8d9fb1c --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,67 @@ +name: documentation + +on: + pull_request: + branches: [main] + push: + branches: [main] + +jobs: + checks: + if: github.event_name != 'push' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.8 + - uses: actions/setup-node@v2 + with: + node-version: '12.x' + - name: Test Build + working-directory: ./website + run: | + python -m pip install --upgrade pip + pip install -e .. + python ../scripts/generate_api_docs.py + if [ -e yarn.lock ]; then + yarn install --frozen-lockfile + elif [ -e package-lock.json ]; then + npm ci + else + npm i + fi + npm run build + gh-release: + if: github.event_name != 'pull_request' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.8 + - uses: actions/setup-node@v2 + with: + node-version: '12.x' + - uses: webfactory/ssh-agent@v0.5.0 + with: + ssh-private-key: ${{ secrets.GH_PAGES_DEPLOY }} + - name: Release to GitHub Pages + env: + USE_SSH: true + GIT_USER: git + working-directory: ./website + run: | + git config --global user.email "feynmanl@fb.com" + git config --global user.name "Feynman Liang" + python -m pip install --upgrade pip + pip install -e .. + python ../scripts/generate_api_docs.py + if [ -e yarn.lock ]; then + yarn install --frozen-lockfile + elif [ -e package-lock.json ]; then + npm ci + else + npm i + fi + npm run deploy diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 00000000..53b1a60a --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,61 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Python package + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + tests-and-coverage-pip: + name: Tests and coverage (pip, Python ${{ matrix.python-version }}, ${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest", "macos-latest", "windows-latest"] + python-version: ['3.7', '3.8', '3.9'] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e .[dev] + - name: Check copyright headers + run: | + python scripts/copyright_headers.py --check flowtorch tests scripts examples + - name: Check formatting with black + run: | + black --check flowtorch tests scripts examples + - name: Check imports with usort + run: | + usort check flowtorch tests scripts examples + - name: Lint with flake8 + run: | + flake8 . --count --show-source --statistics + - name: Check types with mypy + run: | + mypy --disallow-untyped-defs flowtorch + - name: Test with pytest + run: | + pytest --cov=tests --cov-report=xml -W ignore::DeprecationWarning tests/ + - name: Upload coverage to Codecov + if: ${{ runner.os == 'Linux' && matrix.python-version == 3.7 }} + uses: codecov/codecov-action@v1 + with: + token: 9667eb01-c300-4166-b8ba-605deb2682e4 + files: coverage.xml + directory: ./ + flags: unittests + env_vars: OS,PYTHON + name: codecov-umbrella + fail_ci_if_error: true + path_to_write_report: ./codecov_report.txt + verbose: true diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..c88eed5e --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +flowtorch.egg-info +flowtorch/version.py +*.pyc +.ipynb_checkpoints/ +docs/_build/ +debug/.vscode +debug/*.svg +build/ +dist/ +.eggs/ +coverage.xml +.mypy_cache/ +.vscode/ +/.coverage diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 00000000..ec839c54 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,24 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Build documentation with MkDocs +#mkdocs: +# configuration: mkdocs.yml + +# Optionally build your docs in additional formats such as PDF +formats: + - pdf + +# Optionally set the version of Python and requirements required to build your docs +python: + version: 3.7 + install: + - requirements: docs/requirements.txt diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..7d31e52a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,6 @@ +0.3 (September 15, 2021) + +* Deferred initialization of `Bijector`s and `Parameters` is expressed using the `flowtorch.LazyMeta` metaclass +* AffineAutoregressive can operate on inputs with arbitrary `event_shape`s +* A few cosmetic changes like changing `flowtorch.params.*` to `flowtorch.parameters.*` +* Temporarily removed conditional bijectors/transformed distributions and inverting bijectors diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..83f431e8 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,80 @@ +# Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or +advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic +address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a +professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +This Code of Conduct also applies outside the project spaces when there is a +reasonable belief that an individual's behavior may have a negative impact on +the project or its community. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at . All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..eefe96ad --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,33 @@ +# Contributing to FlowTorch +We want to make contributing to this project as easy and transparent as +possible. + +## Pull Requests +We actively welcome your pull requests. + +1. Fork the repo and create your branch from `main`. +2. If you've added code that should be tested, add tests. +3. If you've changed APIs, update the documentation. +4. Ensure the test suite passes. +5. Make sure your code lints. +6. If you haven't already, complete the Contributor License Agreement ("CLA"). + +For more details see the [developers guide](https://flowtorch.ai/dev). + +## Contributor License Agreement ("CLA") +In order to accept your pull request, we need you to submit a CLA. You only need +to do this once to work on any of Facebook's open source projects. + +Complete your CLA here: + +## Issues +We use GitHub issues to track public bugs. Please ensure your description is +clear and has sufficient instructions to be able to reproduce the issue. + +Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe +disclosure of security bugs. In those cases, please go through the process +outlined on that page and do not file a public issue. + +## License +By contributing to FlowTorch, you agree that your contributions will be licensed +under the LICENSE file in the root directory of this source tree. diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000..f2082510 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) FlowTorch Development Team. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..b44fbdab --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,7 @@ +recursive-include flowtorch +global-exclude *.bat *.yml .gitignore +prune debug +prune docs +prune tests +prune website +prune .github \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 00000000..b1241d3c --- /dev/null +++ b/README.md @@ -0,0 +1,34 @@ +

+ +[![](https://github.com/facebookincubator/flowtorch/workflows/Python%20package/badge.svg)](https://github.com/facebookincubator/flowtorch/actions?query=workflow%3A%22Python+package%22) + +Copyright (c) FlowTorch Development Team. + +This source code is licensed under the MIT license found in the +[LICENSE.txt](https://github.com/facebookincubator/flowtorch/blob/main/LICENSE.txt) file in the root directory of this source tree. + +> :boom: **FlowTorch is currently in pre-release and many of its planned features and documentation are incomplete!** You may wish to wait until the first release planned for **8/03/2021**. + +# Overview + +FlowTorch is a PyTorch library for learning and sampling from complex probability distributions using a class of methods called [Normalizing Flows](https://arxiv.org/abs/1908.09257). + +# Installing + +An easy way to get started is to install from source: + + git clone https://github.com/facebookincubator/flowtorch.git + cd flowtorch + pip install -e . + +# Further Information + +We refer you to the [FlowTorch website](https://flowtorch.ai) for more information about installation, using the library, and becoming a contributor. Here is a handy guide: + +* [What are normalizing flows?](https://flowtorch.ai/users) +* [How do I install FlowTorch?](https://flowtorch.ai/users/installation) +* [How do I construct and train a distribution?](https://flowtorch.ai/users/start) +* [How do I contribute new normalizing flow methods?](https://flowtorch.ai/dev) +* [Where can I report bugs?](https://github.com/facebookincubator/flowtorch/issues) +* [Where can I ask general questions and make feature requests?](https://github.com/facebookincubator/flowtorch/discussions) +* [What features are planned for the near future?](https://github.com/facebookincubator/flowtorch/projects) diff --git a/examples/learn_bivariate_normal.py b/examples/learn_bivariate_normal.py new file mode 100644 index 00000000..6f832661 --- /dev/null +++ b/examples/learn_bivariate_normal.py @@ -0,0 +1,123 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +import os + +import flowtorch.bijectors as bij +import flowtorch.distributions as dist +import flowtorch.parameters as params +import matplotlib.pyplot as plt +import torch + +os.environ["KMP_DUPLICATE_LIB_OK"] = "True" + +""" +This is a simple example to demonstrate training of normalizing flows. +Standard bivariate normal noise is sampled from the base distribution +and we learnt to transform it to a bivariate normal distribution with +independent but not identical components (see the produced figures). + +""" + + +def learn_bivariate_normal() -> None: + # Lazily instantiated flow plus base and target distributions + bijectors = bij.AffineAutoregressive( + params=params.DenseAutoregressive(hidden_dims=(32,)) + ) + base_dist = torch.distributions.Independent( + torch.distributions.Normal(torch.zeros(2), torch.ones(2)), 1 + ) + target_dist = torch.distributions.Independent( + torch.distributions.Normal(torch.zeros(2) + 5, torch.ones(2) * 0.5), 1 + ) + + # Instantiate transformed distribution and parameters + flow = dist.Flow(base_dist, bijectors) + + # Fixed samples for plotting + y_initial = flow.sample( + torch.Size( + [ + 300, + ] + ) + ) + y_target = target_dist.sample( + torch.Size( + [ + 300, + ] + ) + ) + + # Training loop + opt = torch.optim.Adam(flow.parameters(), lr=5e-3) + frame = 0 + for idx in range(3001): + opt.zero_grad() + + # Minimize KL(p || q) + y = target_dist.sample((1000,)) + loss = -flow.log_prob(y).mean() + + if idx % 500 == 0: + print("epoch", idx, "loss", loss) + + # Save SVG + y_learnt = ( + flow.sample( + torch.Size( + [ + 300, + ] + ) + ) + .detach() + .numpy() + ) + + plt.figure(figsize=(5, 5), dpi=100) + plt.plot( + y_target[:, 0], + y_target[:, 1], + "o", + color="blue", + alpha=0.95, + label="target", + ) + plt.plot( + y_initial[:, 0], + y_initial[:, 1], + "o", + color="grey", + alpha=0.95, + label="initial", + ) + plt.plot( + y_learnt[:, 0], + y_learnt[:, 1], + "o", + color="red", + alpha=0.95, + label="learnt", + ) + plt.xlim((-4, 8)) + plt.ylim((-4, 8)) + plt.xlabel("$x_1$") + plt.ylabel("$x_2$") + plt.legend(loc="lower right", facecolor=(1, 1, 1, 1.0)) + plt.savefig( + f"bivariate-normal-frame-{frame}.svg", + bbox_inches="tight", + transparent=True, + ) + + frame += 1 + + loss.backward() + opt.step() + + +if __name__ == "__main__": + learn_bivariate_normal() diff --git a/flowtorch/__init__.py b/flowtorch/__init__.py new file mode 100644 index 00000000..ab9c4091 --- /dev/null +++ b/flowtorch/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from flowtorch.lazy import Lazy, LazyMeta + +__all__ = ["Lazy", "LazyMeta"] diff --git a/flowtorch/bijectors/__init__.py b/flowtorch/bijectors/__init__.py new file mode 100644 index 00000000..1b768233 --- /dev/null +++ b/flowtorch/bijectors/__init__.py @@ -0,0 +1,97 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +""" +Warning: This file was generated by flowtorch/scripts/generate_imports.py +Do not modify or delete! + +""" + +import inspect +from typing import cast, List, Tuple + +import torch +from flowtorch.bijectors.affine import Affine +from flowtorch.bijectors.affine_autoregressive import AffineAutoregressive +from flowtorch.bijectors.affine_fixed import AffineFixed +from flowtorch.bijectors.autoregressive import Autoregressive +from flowtorch.bijectors.base import Bijector +from flowtorch.bijectors.compose import Compose +from flowtorch.bijectors.elementwise import Elementwise +from flowtorch.bijectors.elu import ELU +from flowtorch.bijectors.exp import Exp +from flowtorch.bijectors.fixed import Fixed +from flowtorch.bijectors.leaky_relu import LeakyReLU +from flowtorch.bijectors.permute import Permute +from flowtorch.bijectors.power import Power +from flowtorch.bijectors.sigmoid import Sigmoid +from flowtorch.bijectors.softplus import Softplus +from flowtorch.bijectors.spline import Spline +from flowtorch.bijectors.spline_autoregressive import SplineAutoregressive +from flowtorch.bijectors.tanh import Tanh +from flowtorch.bijectors.volume_preserving import VolumePreserving + +standard_bijectors = [ + ("Affine", Affine), + ("AffineAutoregressive", AffineAutoregressive), + ("AffineFixed", AffineFixed), + ("ELU", ELU), + ("Exp", Exp), + ("LeakyReLU", LeakyReLU), + ("Permute", Permute), + ("Power", Power), + ("Sigmoid", Sigmoid), + ("Softplus", Softplus), + ("Spline", Spline), + ("SplineAutoregressive", SplineAutoregressive), + ("Tanh", Tanh), +] + +meta_bijectors = [ + ("Elementwise", Elementwise), + ("Autoregressive", Autoregressive), + ("Fixed", Fixed), + ("Bijector", Bijector), + ("Compose", Compose), + ("VolumePreserving", VolumePreserving), +] + + +def isbijector(cls: type) -> bool: + # A class must inherit from flowtorch.Bijector to be considered a valid bijector + return issubclass(cls, Bijector) + + +def standard_bijector(cls: type) -> bool: + # "Standard bijectors" are the ones we can perform standard automated tests upon + return ( + inspect.isclass(cls) + and isbijector(cls) + and cls.__name__ not in [clx for clx, _ in meta_bijectors] + ) + + +# Determine invertible bijectors +invertible_bijectors = [] +for bij_name, cls in standard_bijectors: + # TODO: Use factored out version of the following + # Define plan for flow + event_dim = max(cls.domain.event_dim, 1) # type: ignore + event_shape = event_dim * [4] + # base_dist = dist.Normal(torch.zeros(event_shape), torch.ones(event_shape)) + bij = cls(shape=torch.Size(event_shape)) + + try: + y = torch.randn(*bij.forward_shape(event_shape)) + bij.inverse(y) + except NotImplementedError: + pass + else: + invertible_bijectors.append((bij_name, cls)) + + +__all__ = ["standard_bijectors", "meta_bijectors", "invertible_bijectors"] + [ + cls + for cls, _ in cast(List[Tuple[str, Bijector]], meta_bijectors) + + cast(List[Tuple[str, Bijector]], standard_bijectors) +] diff --git a/flowtorch/bijectors/affine.py b/flowtorch/bijectors/affine.py new file mode 100644 index 00000000..3e9b1f4f --- /dev/null +++ b/flowtorch/bijectors/affine.py @@ -0,0 +1,31 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Optional + +import flowtorch +import torch +from flowtorch.bijectors.elementwise import Elementwise +from flowtorch.bijectors.ops.affine import Affine as AffineOp + + +class Affine(AffineOp, Elementwise): + r""" + Elementwise bijector via the affine mapping :math:`\mathbf{y} = \mu + + \sigma \otimes \mathbf{x}` where $\mu$ and $\sigma$ are learnable parameters. + """ + + def __init__( + self, + params: Optional[flowtorch.Lazy] = None, + *, + shape: torch.Size, + context_shape: Optional[torch.Size] = None, + log_scale_min_clip: float = -5.0, + log_scale_max_clip: float = 3.0, + sigmoid_bias: float = 2.0, + ) -> None: + super().__init__(params, shape=shape, context_shape=context_shape) + self.log_scale_min_clip = log_scale_min_clip + self.log_scale_max_clip = log_scale_max_clip + self.sigmoid_bias = sigmoid_bias diff --git a/flowtorch/bijectors/affine_autoregressive.py b/flowtorch/bijectors/affine_autoregressive.py new file mode 100644 index 00000000..21c224de --- /dev/null +++ b/flowtorch/bijectors/affine_autoregressive.py @@ -0,0 +1,31 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Optional + +import flowtorch +import flowtorch.parameters +import torch +from flowtorch.bijectors.autoregressive import Autoregressive +from flowtorch.bijectors.ops.affine import Affine as AffineOp + + +class AffineAutoregressive(AffineOp, Autoregressive): + def __init__( + self, + params: Optional[flowtorch.Lazy] = None, + *, + shape: torch.Size, + context_shape: Optional[torch.Size] = None, + log_scale_min_clip: float = -5.0, + log_scale_max_clip: float = 3.0, + sigmoid_bias: float = 2.0, + ) -> None: + super().__init__( + params, + shape=shape, + context_shape=context_shape, + ) + self.log_scale_min_clip = log_scale_min_clip + self.log_scale_max_clip = log_scale_max_clip + self.sigmoid_bias = sigmoid_bias diff --git a/flowtorch/bijectors/affine_fixed.py b/flowtorch/bijectors/affine_fixed.py new file mode 100644 index 00000000..9c39eab3 --- /dev/null +++ b/flowtorch/bijectors/affine_fixed.py @@ -0,0 +1,54 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +import math +from typing import Optional + +import flowtorch +import torch +from flowtorch.bijectors.fixed import Fixed + + +class AffineFixed(Fixed): + r""" + Elementwise bijector via the affine mapping :math:`\mathbf{y} = \mu + + \sigma \otimes \mathbf{x}` where $\mu$ and $\sigma$ are fixed rather than + learnable. + """ + + # TODO: Handle non-scalar loc and scale with correct broadcasting semantics + def __init__( + self, + params: Optional[flowtorch.Lazy] = None, + *, + shape: torch.Size, + context_shape: Optional[torch.Size] = None, + loc: float = 0.0, + scale: float = 1.0 + ) -> None: + super().__init__(params, shape=shape, context_shape=context_shape) + self.loc = loc + self.scale = scale + + def _forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return self.loc + self.scale * x + + def _inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return (y - self.loc) / self.scale + + def _log_abs_det_jacobian( + self, + x: torch.Tensor, + y: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return torch.full_like(x, math.log(abs(self.scale))) diff --git a/flowtorch/bijectors/autoregressive.py b/flowtorch/bijectors/autoregressive.py new file mode 100644 index 00000000..8593f11e --- /dev/null +++ b/flowtorch/bijectors/autoregressive.py @@ -0,0 +1,70 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Any, cast, Optional + +import flowtorch +import flowtorch.parameters +import torch +import torch.distributions.constraints as constraints +from flowtorch.bijectors.base import Bijector +from flowtorch.parameters.dense_autoregressive import DenseAutoregressive + + +class Autoregressive(Bijector): + # "Default" event shape is to operate on vectors + domain = constraints.real_vector + codomain = constraints.real_vector + + def __init__( + self, + params: Optional[flowtorch.Lazy] = None, + *, + shape: torch.Size, + context_shape: Optional[torch.Size] = None, + **kwargs: Any + ) -> None: + # Event shape is determined by `shape` argument + self.domain = constraints.independent(constraints.real, len(shape)) + self.codomain = constraints.independent(constraints.real, len(shape)) + + # currently only DenseAutoregressive has a `permutation` buffer + if not params: + params = DenseAutoregressive() # type: ignore + + # TODO: Replace P.DenseAutoregressive with P.Autoregressive + # In the future there will be other autoregressive parameter classes + assert params is not None and issubclass(params.cls, DenseAutoregressive) + + super().__init__(params, shape=shape, context_shape=context_shape) + + def inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + # TODO: Allow that context can have a batch shape + assert context is None # or context.shape == (self._context_size,) + params = self.params + assert params is not None + + x_new = torch.zeros_like(y) + # NOTE: Inversion is an expensive operation that scales in the + # dimension of the input + permutation = ( + params.permutation + ) # TODO: type-safe named buffer (e.g. "permutation") access + # TODO: Make permutation, inverse work for other event shapes + for idx in cast(torch.LongTensor, permutation): + x_new[..., idx] = self._inverse(y, x_new.clone(), context)[..., idx] + + return x_new + + def _log_abs_det_jacobian( + self, + x: torch.Tensor, + y: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + raise NotImplementedError diff --git a/flowtorch/bijectors/base.py b/flowtorch/bijectors/base.py new file mode 100644 index 00000000..0e6b7b40 --- /dev/null +++ b/flowtorch/bijectors/base.py @@ -0,0 +1,143 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT +from typing import Optional, Sequence, Union + +import flowtorch +import flowtorch.distributions +import flowtorch.parameters +import torch +import torch.distributions +from flowtorch.parameters import Parameters +from torch.distributions import constraints + + +class Bijector(metaclass=flowtorch.LazyMeta): + codomain: constraints.Constraint = constraints.real + domain: constraints.Constraint = constraints.real + _shape: torch.Size + _context_shape: Optional[torch.Size] + _params: Optional[Union[Parameters, torch.nn.ModuleList]] = None + + def __init__( + self, + params: Optional[flowtorch.Lazy] = None, + *, + shape: torch.Size, + context_shape: Optional[torch.Size] = None, + ) -> None: + # Prevent "meta bijectors" from being initialized + # NOTE: We define a "standard bijector" as one that inherits from a + # subclass of Bijector, hence why we need to test the length of the MRO + if ( + self.__class__.__module__ == "flowtorch.bijectors.base" + or len(self.__class__.__mro__) <= 3 + ): + raise TypeError("Only standard bijectors can be initialized.") + + self._shape = shape + self._context_shape = context_shape + + # Instantiate parameters (tensor, hypernets, etc.) + if params is not None: + param_shapes = self.param_shapes(shape) + self._params = params( # type: ignore + param_shapes, self._shape, self._context_shape + ) + + @property + def params(self) -> Optional[Union[Parameters, torch.nn.ModuleList]]: + return self._params + + @params.setter + def params(self, value: Optional[Union[Parameters, torch.nn.ModuleList]]) -> None: + self._params = value + + def forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + # TODO: Allow that context can have a batch shape + assert context is None # or context.shape == (self._context_size,) + return self._forward(x, context) + + def _forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + """ + Abstract method to compute forward transformation. + """ + raise NotImplementedError + + def inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + # TODO: Allow that context can have a batch shape + assert context is None # or context.shape == (self._context_size,) + return self._inverse(y, x, context) + + def _inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + """ + Abstract method to compute inverse transformation. + """ + raise NotImplementedError + + def log_abs_det_jacobian( + self, + x: torch.Tensor, + y: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + """ + Computes the log det jacobian `log |dy/dx|` given input and output. + By default, assumes a volume preserving bijection. + """ + return self._log_abs_det_jacobian(x, y, context) + + def _log_abs_det_jacobian( + self, + x: torch.Tensor, + y: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + """ + Computes the log det jacobian `log |dy/dx|` given input and output. + By default, assumes a volume preserving bijection. + """ + + # TODO: Sum out self.event_dim right-most dimensions + # self.event_dim may be > 0 for derived classes! + return torch.zeros_like(x) + + def param_shapes(self, shape: torch.Size) -> Sequence[torch.Size]: + """ + Abstract method to return shapes of parameters + """ + raise NotImplementedError + + def __repr__(self) -> str: + return self.__class__.__name__ + "()" + + def forward_shape(self, shape: torch.Size) -> torch.Size: + """ + Infers the shape of the forward computation, given the input shape. + Defaults to preserving shape. + """ + return shape + + def inverse_shape(self, shape: torch.Size) -> torch.Size: + """ + Infers the shapes of the inverse computation, given the output shape. + Defaults to preserving shape. + """ + return shape diff --git a/flowtorch/bijectors/compose.py b/flowtorch/bijectors/compose.py new file mode 100644 index 00000000..20f1606f --- /dev/null +++ b/flowtorch/bijectors/compose.py @@ -0,0 +1,88 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Optional, Sequence + +import flowtorch +import flowtorch.parameters +import torch +import torch.distributions +from flowtorch.bijectors.base import Bijector +from torch.distributions.utils import _sum_rightmost + + +class Compose(Bijector): + def __init__( + self, + bijectors: Sequence[flowtorch.Lazy], + *, + shape: torch.Size, + context_shape: Optional[torch.Size] = None, + ): + assert len(bijectors) > 0 + + # Instantiate all bijectors, propagating shape information + self.bijectors = [] + for bijector in bijectors: + assert issubclass(bijector.cls, Bijector) + + self.bijectors.append(bijector(shape=shape)) + shape = self.bijectors[-1].forward_shape(shape) # type: ignore + + self.domain = self.bijectors[0].domain # type: ignore + self.codomain = self.bijectors[-1].codomain # type: ignore + + # Make parameters accessible to dist.Flow + self._params = torch.nn.ModuleList( + [ + b._params # type: ignore + for b in self.bijectors + if isinstance(b._params, torch.nn.Module) # type: ignore + ] + ) + + self._context_shape = context_shape + + # NOTE: We overwrite forward rather than _forward so that the composed + # bijectors can handle the caching separately! + def forward(self, x: torch.Tensor, context: torch.Tensor = None) -> torch.Tensor: + for bijector in self.bijectors: + x = bijector.forward(x, context) # type: ignore + + return x + + def inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + for bijector in reversed(self.bijectors): + y = bijector.inverse(y, x, context) # type: ignore + + return y + + def log_abs_det_jacobian( + self, x: torch.Tensor, y: torch.Tensor, context: torch.Tensor = None + ) -> torch.Tensor: + """ + Computes the log det jacobian `log |dy/dx|` given input and output. + By default, assumes a volume preserving bijection. + """ + ldj = _sum_rightmost( + torch.zeros_like(y), + self.domain.event_dim, + ) + for bijector in reversed(self.bijectors): + y_inv = bijector.inverse(y, context) # type: ignore + ldj += bijector.log_abs_det_jacobian(y_inv, y, context) # type: ignore + y = y_inv + return ldj + + def param_shapes(self, shape: torch.Size) -> Sequence[torch.Size]: + """ + Given a base distribution, calculate the parameters for the transformation + of that distribution under this bijector. By default, no parameters are + set. + """ + return [] diff --git a/flowtorch/bijectors/elementwise.py b/flowtorch/bijectors/elementwise.py new file mode 100644 index 00000000..e4805cc5 --- /dev/null +++ b/flowtorch/bijectors/elementwise.py @@ -0,0 +1,26 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT +from typing import Any, Optional + +import flowtorch +import torch +import torch.distributions +from flowtorch.bijectors.base import Bijector +from flowtorch.parameters.tensor import Tensor + + +class Elementwise(Bijector): + def __init__( + self, + params: Optional[flowtorch.Lazy] = None, + *, + shape: torch.Size, + context_shape: Optional[torch.Size] = None, + **kwargs: Any + ) -> None: + if not params: + params = Tensor() # type: ignore + + assert params is None or issubclass(params.cls, Tensor) + + super().__init__(params, shape=shape, context_shape=context_shape) diff --git a/flowtorch/bijectors/elu.py b/flowtorch/bijectors/elu.py new file mode 100644 index 00000000..63332f99 --- /dev/null +++ b/flowtorch/bijectors/elu.py @@ -0,0 +1,41 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Optional + +import torch +import torch.distributions.constraints as constraints +import torch.nn.functional as F +from flowtorch.bijectors.fixed import Fixed +from flowtorch.ops import eps + + +class ELU(Fixed): + codomain = constraints.greater_than(-1.0) + + # TODO: Setting the alpha value of ELU as __init__ argument + + def _forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return F.elu(x) + + def _inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return torch.max(y, torch.zeros_like(y)) + torch.min( + torch.log1p(y + eps), torch.zeros_like(y) + ) + + def _log_abs_det_jacobian( + self, + x: torch.Tensor, + y: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return -F.relu(-x) diff --git a/flowtorch/bijectors/exp.py b/flowtorch/bijectors/exp.py new file mode 100644 index 00000000..2b9d73e6 --- /dev/null +++ b/flowtorch/bijectors/exp.py @@ -0,0 +1,38 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Optional + +import torch +import torch.distributions.constraints as constraints +from flowtorch.bijectors.fixed import Fixed + + +class Exp(Fixed): + r""" + Elementwise bijector via the mapping :math:`y = \exp(x)`. + """ + codomain = constraints.positive + + def _forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return torch.exp(x) + + def _inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return y.log() + + def _log_abs_det_jacobian( + self, + x: torch.Tensor, + y: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return x diff --git a/flowtorch/bijectors/fixed.py b/flowtorch/bijectors/fixed.py new file mode 100644 index 00000000..ad8deb1b --- /dev/null +++ b/flowtorch/bijectors/fixed.py @@ -0,0 +1,32 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT +from typing import Optional, Sequence + +import flowtorch +import torch +import torch.distributions +from flowtorch.bijectors.base import Bijector + + +class Fixed(Bijector): + def __init__( + self, + params: Optional[flowtorch.Lazy] = None, + *, + shape: torch.Size, + context_shape: Optional[torch.Size] = None, + ) -> None: + # TODO: In the future, make Fixed actually mean that there is no autograd + # through params + super().__init__(params, shape=shape, context_shape=context_shape) + assert params is None + + def param_shapes(self, shape: torch.Size) -> Sequence[torch.Size]: + """ + Given a base distribution, calculate the parameters for the transformation + of that distribution under this bijector. By default, no parameters are + set. + """ + # TODO: In the future, make Fixed actually mean that there is no autograd + # through params + return [] diff --git a/flowtorch/bijectors/leaky_relu.py b/flowtorch/bijectors/leaky_relu.py new file mode 100644 index 00000000..b8136575 --- /dev/null +++ b/flowtorch/bijectors/leaky_relu.py @@ -0,0 +1,38 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +import math +from typing import Optional + +import torch +import torch.nn.functional as F +from flowtorch.bijectors.fixed import Fixed + + +class LeakyReLU(Fixed): + # TODO: Setting the slope of Leaky ReLU as __init__ argument + + def _forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return F.leaky_relu(x) + + def _inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return F.leaky_relu(y, negative_slope=100.0) + + def _log_abs_det_jacobian( + self, + x: torch.Tensor, + y: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return torch.where( + x >= 0.0, torch.zeros_like(x), torch.ones_like(x) * math.log(0.01) + ) diff --git a/flowtorch/bijectors/ops/__init__.py b/flowtorch/bijectors/ops/__init__.py new file mode 100644 index 00000000..bb884103 --- /dev/null +++ b/flowtorch/bijectors/ops/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT diff --git a/flowtorch/bijectors/ops/affine.py b/flowtorch/bijectors/ops/affine.py new file mode 100644 index 00000000..b799ea7e --- /dev/null +++ b/flowtorch/bijectors/ops/affine.py @@ -0,0 +1,86 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Optional, Tuple + +import flowtorch +import torch +from flowtorch.bijectors.base import Bijector +from flowtorch.ops import clamp_preserve_gradients +from torch.distributions.utils import _sum_rightmost + + +class Affine(Bijector): + r""" + Affine mapping :math:`\mathbf{y} = \mu + \sigma \otimes \mathbf{x}` where + $\mu$ and $\sigma$ are learnable parameters. + + """ + + def __init__( + self, + params: Optional[flowtorch.Lazy] = None, + *, + shape: torch.Size, + context_shape: Optional[torch.Size] = None, + log_scale_min_clip: float = -5.0, + log_scale_max_clip: float = 3.0, + sigmoid_bias: float = 2.0, + ) -> None: + super().__init__(params, shape=shape, context_shape=context_shape) + self.log_scale_min_clip = log_scale_min_clip + self.log_scale_max_clip = log_scale_max_clip + self.sigmoid_bias = sigmoid_bias + + def _forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + params = self.params + assert params is not None + + mean, log_scale = params(x, context=context) + log_scale = clamp_preserve_gradients( + log_scale, self.log_scale_min_clip, self.log_scale_max_clip + ) + scale = torch.exp(log_scale) + y = scale * x + mean + return y + + def _inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + params = self.params + assert params is not None + + mean, log_scale = params(x, context=context) + log_scale = clamp_preserve_gradients( + log_scale, self.log_scale_min_clip, self.log_scale_max_clip + ) + inverse_scale = torch.exp(-log_scale) + x_new = (y - mean) * inverse_scale + return x_new + + def _log_abs_det_jacobian( + self, + x: torch.Tensor, + y: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + params = self.params + assert params is not None + + # Note: params will take care of caching "mean, log_scale = params(x)" + _, log_scale = params(x, context=context) + log_scale = clamp_preserve_gradients( + log_scale, self.log_scale_min_clip, self.log_scale_max_clip + ) + return _sum_rightmost(log_scale, self.domain.event_dim) + + def param_shapes(self, shape: torch.Size) -> Tuple[torch.Size, torch.Size]: + # A mean and log variance for every dimension of the event shape + return shape, shape diff --git a/flowtorch/bijectors/ops/spline.py b/flowtorch/bijectors/ops/spline.py new file mode 100644 index 00000000..ba2f9e8b --- /dev/null +++ b/flowtorch/bijectors/ops/spline.py @@ -0,0 +1,116 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +# This implementation is adapted in part from: +# * https://github.com/tonyduan/normalizing-flows/blob/master/nf/flows.py; +# * https://github.com/hmdolatabadi/LRS_NF/blob/master/nde/transforms/ +# nonlinearities.py; and, +# * https://github.com/bayesiains/nsf/blob/master/nde/transforms/splines/ +# rational_quadratic.py +# under the MIT license. + +from typing import Any, Optional, Sequence, Tuple + +import flowtorch +import torch +import torch.nn.functional as F +from flowtorch.bijectors.base import Bijector +from flowtorch.ops import monotonic_rational_spline +from torch.distributions.utils import _sum_rightmost + + +class Spline(Bijector): + def __init__( + self, + params: Optional[flowtorch.Lazy] = None, + *, + shape: torch.Size, + context_shape: Optional[torch.Size] = None, + count_bins: int = 8, + bound: float = 3.0, + order: str = "linear" + ) -> None: + if order not in ["linear", "quadratic"]: + raise ValueError( + "Keyword argument 'order' must be one of ['linear', \ +'quadratic'], but '{}' was found!".format( + order + ) + ) + + self.count_bins = count_bins + self.bound = bound + self.order = order + + super().__init__(params, shape=shape, context_shape=context_shape) + + def _forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + y, _ = self._op(x, x, context) + return y + + def _inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + x_new, _ = self._op(y, x, context=context, inverse=True) + return x_new + + def _log_abs_det_jacobian( + self, + x: torch.Tensor, + y: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + _, log_detJ = self._op(x, x, context) + return _sum_rightmost(log_detJ, self.domain.event_dim) + + def _op( + self, + input: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + inverse: bool = False, + **kwargs: Any + ) -> Tuple[torch.Tensor, torch.Tensor]: + params = self.params + assert params is not None + + if self.order == "linear": + widths, heights, derivatives, lambdas = params(x, context=context) + lambdas = torch.sigmoid(lambdas) + else: + widths, heights, derivatives = params(x, context=context) + lambdas = None + + # Constrain parameters + # TODO: Move to flowtorch.ops function? + widths = F.softmax(widths, dim=-1) + heights = F.softmax(heights, dim=-1) + derivatives = F.softplus(derivatives) + + y, log_detJ = monotonic_rational_spline( + input, + widths, + heights, + derivatives, + lambdas, + bound=self.bound, + inverse=inverse, + **kwargs + ) + return y, log_detJ + + def param_shapes(self, shape: torch.Size) -> Sequence[torch.Size]: + s1 = torch.Size(shape + (self.count_bins,)) + s2 = torch.Size(shape + (self.count_bins - 1,)) + + if self.order == "linear": + return s1, s1, s2, s1 + else: + return s1, s1, s2 diff --git a/flowtorch/bijectors/permute.py b/flowtorch/bijectors/permute.py new file mode 100644 index 00000000..565bf64c --- /dev/null +++ b/flowtorch/bijectors/permute.py @@ -0,0 +1,60 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Optional + +import flowtorch +import torch +import torch.distributions.constraints as constraints +from flowtorch.bijectors.fixed import Fixed +from flowtorch.bijectors.volume_preserving import VolumePreserving +from torch.distributions.utils import lazy_property + + +class Permute(Fixed, VolumePreserving): + domain = constraints.real_vector + codomain = constraints.real_vector + + # TODO: A new abstraction so can defer construction of permutation + def __init__( + self, + params: Optional[flowtorch.Lazy] = None, + *, + shape: torch.Size, + context_shape: Optional[torch.Size] = None, + permutation: Optional[torch.Tensor] = None + ) -> None: + super().__init__(params, shape=shape, context_shape=context_shape) + self.permutation = permutation + + def _forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + if self.permutation is None: + self.permutation = torch.randperm(x.shape[-1]) + + return torch.index_select(x, -1, self.permutation) + + def _inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + if self.permutation is None: + self.permutation = torch.randperm(y.shape[-1]) + + return torch.index_select(y, -1, self.inv_permutation) + + @lazy_property + def inv_permutation(self) -> Optional[torch.Tensor]: + if self.permutation is None: + return None + + result = torch.empty_like(self.permutation, dtype=torch.long) + result[self.permutation] = torch.arange( + self.permutation.size(0), dtype=torch.long, device=self.permutation.device + ) + return result diff --git a/flowtorch/bijectors/power.py b/flowtorch/bijectors/power.py new file mode 100644 index 00000000..a26521d1 --- /dev/null +++ b/flowtorch/bijectors/power.py @@ -0,0 +1,52 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Optional + +import flowtorch +import torch +import torch.distributions.constraints as constraints +from flowtorch.bijectors.fixed import Fixed + + +class Power(Fixed): + r""" + Elementwise bijector via the mapping :math:`y = x^{\text{exponent}}`. + """ + domain = constraints.positive + codomain = constraints.positive + + # TODO: Tensor valued exponents and corresponding determination of event_dim + def __init__( + self, + params: Optional[flowtorch.Lazy] = None, + *, + shape: torch.Size, + context_shape: Optional[torch.Size] = None, + exponent: float = 2.0, + ) -> None: + super().__init__(params, shape=shape, context_shape=context_shape) + self.exponent = exponent + + def _forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return x.pow(self.exponent) + + def _inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return y.pow(1 / self.exponent) + + def _log_abs_det_jacobian( + self, + x: torch.Tensor, + y: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return torch.abs(self.exponent * y / x).log() diff --git a/flowtorch/bijectors/sigmoid.py b/flowtorch/bijectors/sigmoid.py new file mode 100644 index 00000000..a28069e4 --- /dev/null +++ b/flowtorch/bijectors/sigmoid.py @@ -0,0 +1,39 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Optional + +import torch +import torch.distributions.constraints as constraints +import torch.nn.functional as F +from flowtorch.bijectors.fixed import Fixed +from flowtorch.ops import clipped_sigmoid + + +class Sigmoid(Fixed): + codomain = constraints.unit_interval + + def _forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return clipped_sigmoid(x) + + def _inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + finfo = torch.finfo(y.dtype) + y = y.clamp(min=finfo.tiny, max=1.0 - finfo.eps) + return y.log() - torch.log1p(-y) + + def _log_abs_det_jacobian( + self, + x: torch.Tensor, + y: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return -F.softplus(-x) - F.softplus(x) diff --git a/flowtorch/bijectors/softplus.py b/flowtorch/bijectors/softplus.py new file mode 100644 index 00000000..5459fd50 --- /dev/null +++ b/flowtorch/bijectors/softplus.py @@ -0,0 +1,40 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Optional + +import flowtorch.ops +import torch +import torch.distributions.constraints as constraints +import torch.nn.functional as F +from flowtorch.bijectors.fixed import Fixed + + +class Softplus(Fixed): + r""" + Elementwise bijector via the mapping :math:`\text{Softplus}(x) = \log(1 + \exp(x))`. + """ + codomain = constraints.positive + + def _forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return F.softplus(x) + + def _inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return flowtorch.ops.softplus_inv(y) + + def _log_abs_det_jacobian( + self, + x: torch.Tensor, + y: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return -F.softplus(-x) diff --git a/flowtorch/bijectors/spline.py b/flowtorch/bijectors/spline.py new file mode 100644 index 00000000..479a7d5e --- /dev/null +++ b/flowtorch/bijectors/spline.py @@ -0,0 +1,30 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Optional + +import flowtorch +import torch +from flowtorch.bijectors.elementwise import Elementwise +from flowtorch.bijectors.ops.spline import Spline as SplineOp + + +class Spline(SplineOp, Elementwise): + def __init__( + self, + params: Optional[flowtorch.Lazy] = None, + *, + shape: torch.Size, + context_shape: Optional[torch.Size] = None, + count_bins: int = 8, + bound: float = 3.0, + order: str = "linear" + ) -> None: + super().__init__( + params, + shape=shape, + context_shape=context_shape, + count_bins=count_bins, + bound=bound, + order=order, + ) diff --git a/flowtorch/bijectors/spline_autoregressive.py b/flowtorch/bijectors/spline_autoregressive.py new file mode 100644 index 00000000..ae29c787 --- /dev/null +++ b/flowtorch/bijectors/spline_autoregressive.py @@ -0,0 +1,31 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Optional + +import flowtorch +import flowtorch.parameters +import torch +from flowtorch.bijectors.autoregressive import Autoregressive +from flowtorch.bijectors.ops.spline import Spline as SplineOp + + +class SplineAutoregressive(SplineOp, Autoregressive): + def __init__( + self, + params: Optional[flowtorch.Lazy] = None, + *, + shape: torch.Size, + context_shape: Optional[torch.Size] = None, + count_bins: int = 8, + bound: float = 3.0, + order: str = "linear" + ) -> None: + super().__init__( + params, + shape=shape, + context_shape=context_shape, + count_bins=count_bins, + bound=bound, + order=order, + ) diff --git a/flowtorch/bijectors/tanh.py b/flowtorch/bijectors/tanh.py new file mode 100644 index 00000000..82d6500f --- /dev/null +++ b/flowtorch/bijectors/tanh.py @@ -0,0 +1,40 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +import math +from typing import Optional + +import torch +import torch.distributions.constraints as constraints +import torch.nn.functional as F +from flowtorch.bijectors.fixed import Fixed + + +class Tanh(Fixed): + r""" + Transform via the mapping :math:`y = \tanh(x)`. + """ + codomain = constraints.interval(-1.0, 1.0) + + def _forward( + self, + x: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return torch.tanh(x) + + def _inverse( + self, + y: torch.Tensor, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return torch.atanh(y) + + def _log_abs_det_jacobian( + self, + x: torch.Tensor, + y: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + return 2.0 * (math.log(2.0) - x - F.softplus(-2.0 * x)) diff --git a/flowtorch/bijectors/volume_preserving.py b/flowtorch/bijectors/volume_preserving.py new file mode 100644 index 00000000..3eb9cc7f --- /dev/null +++ b/flowtorch/bijectors/volume_preserving.py @@ -0,0 +1,24 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT +from typing import Optional + +import torch +import torch.distributions +from flowtorch.bijectors.base import Bijector + + +class VolumePreserving(Bijector): + def _log_abs_det_jacobian( + self, + x: torch.Tensor, + y: torch.Tensor, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + # TODO: Confirm that this should involve `x`/`self.domain` and not + # `y`/`self.codomain` + return torch.zeros( + x.size()[: -self.domain.event_dim], + dtype=x.dtype, + layout=x.layout, # pyre-ignore[16] + device=x.device, + ) diff --git a/flowtorch/distributions/__init__.py b/flowtorch/distributions/__init__.py new file mode 100644 index 00000000..5e63c266 --- /dev/null +++ b/flowtorch/distributions/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +""" +Warning: This file was generated by flowtorch/scripts/generate_imports.py +Do not modify or delete! + +""" + +from flowtorch.distributions.flow import Flow +from flowtorch.distributions.neals_funnel import NealsFunnel + +__all__ = ["Flow", "NealsFunnel"] diff --git a/flowtorch/distributions/flow.py b/flowtorch/distributions/flow.py new file mode 100644 index 00000000..11b09654 --- /dev/null +++ b/flowtorch/distributions/flow.py @@ -0,0 +1,127 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT +from typing import Any, Dict, Optional, Union + +import flowtorch +import torch +import torch.distributions as dist +from torch import Tensor +from torch.distributions.utils import _sum_rightmost + + +class Flow(torch.nn.Module, dist.Distribution, metaclass=flowtorch.LazyMeta): + _default_sample_shape = torch.Size() + arg_constraints: Dict[str, dist.constraints.Constraint] = {} + + def __init__( + self, + base_dist: dist.Distribution, + bijector: flowtorch.Lazy, + validate_args: Any = None, + ) -> None: + torch.nn.Module.__init__(self) + + self.base_dist = base_dist + self._context: Optional[torch.Tensor] = None + self.bijector = bijector(shape=base_dist.event_shape) + + # Required so that parameters are registered with nn.Module + self.params = self.bijector._params # type: ignore + + # TODO: Confirm that the following logic works. Shouldn't it use + # .domain and .codomain?? Infer shape from constructed self.bijector + shape = ( + self.base_dist.batch_shape + self.base_dist.event_shape # pyre-ignore[16] + ) + event_dim = self.bijector.domain.event_dim # type: ignore + event_dim = max(event_dim, len(self.base_dist.event_shape)) + batch_shape = shape[: len(shape) - event_dim] + event_shape = shape[len(shape) - event_dim :] + + dist.Distribution.__init__( + self, batch_shape, event_shape, validate_args=validate_args + ) + + def condition(self, context: torch.Tensor) -> "Flow": + self._context = context + return self + + def sample( + self, + sample_shape: Union[Tensor, torch.Size] = _default_sample_shape, + context: Optional[torch.Tensor] = None, + ) -> Tensor: + """ + Generates a sample_shape shaped sample or sample_shape shaped batch of + samples if the distribution parameters are batched. Samples first from + base distribution and applies `transform()` for every transform in the + list. + """ + if context is None: + context = self._context + with torch.no_grad(): + x = self.base_dist.sample(sample_shape) + x = self.bijector.forward(x, context) # type: ignore + return x + + def rsample( + self, + sample_shape: Union[Tensor, torch.Size] = _default_sample_shape, + context: Optional[torch.Tensor] = None, + ) -> Tensor: + """ + Generates a sample_shape shaped reparameterized sample or sample_shape + shaped batch of reparameterized samples if the distribution parameters + are batched. Samples first from base distribution and applies + `transform()` for every transform in the list. + """ + if context is None: + context = self._context + x = self.base_dist.rsample(sample_shape) + x = self.bijector.forward(x, context) # type: ignore + return x + + def rnormalize( + self, value: torch.Tensor, context: Optional[torch.Tensor] = None + ) -> Tensor: + """ + Push a tensor through the normalizing direction of the flow where + we can take autodiff gradients on the bijector. + """ + if context is None: + context = self._context + + return self.bijector.inverse(value, context) # type: ignore + + def normalize( + self, value: torch.Tensor, context: Optional[torch.Tensor] = None + ) -> Tensor: + """ + Push a tensor through the normalizing direction of the flow and + block autodiff gradients on the bijector. + """ + with torch.no_grad(): + return self.rnormalize(value, context) + + def log_prob( + self, value: torch.Tensor, context: Optional[torch.Tensor] = None + ) -> torch.Tensor: + """ + Scores the sample by inverting the transform(s) and computing the score + using the score of the base distribution and the log abs det jacobian. + """ + if context is None: + context = self._context + event_dim = len(self.event_shape) # pyre-ignore[16] + + x = self.bijector.inverse(value, context) # type: ignore + log_prob = -_sum_rightmost( + self.bijector.log_abs_det_jacobian(x, value, context), # type: ignore + event_dim - self.bijector.domain.event_dim, # type: ignore + ) + log_prob = log_prob + _sum_rightmost( + self.base_dist.log_prob(x), + event_dim - len(self.base_dist.event_shape), # pyre-ignore[16] + ) + + return log_prob diff --git a/flowtorch/distributions/neals_funnel.py b/flowtorch/distributions/neals_funnel.py new file mode 100644 index 00000000..56a1e8ca --- /dev/null +++ b/flowtorch/distributions/neals_funnel.py @@ -0,0 +1,53 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT +from typing import Any, Dict, Optional, Union + +import torch +import torch.distributions as dist +from torch.distributions import constraints +from torch.distributions.utils import _standard_normal + + +class NealsFunnel(dist.Distribution): + """ + Neal's funnel. + p(x,y) = N(y|0,3) N(x|0,exp(y/2)) + """ + + support = constraints.real + arg_constraints: Dict[str, dist.constraints.Constraint] = {} + + def __init__(self, validate_args: Any = None) -> None: + d = 2 + batch_shape, event_shape = torch.Size([]), (d,) + super(NealsFunnel, self).__init__( + batch_shape, event_shape, validate_args=validate_args + ) + + def rsample( + self, + sample_shape: Union[torch.Tensor, torch.Size] = None, + context: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + if not sample_shape: + sample_shape = torch.Size() + eps = _standard_normal( + (sample_shape[0], 2), dtype=torch.float, device=torch.device("cpu") + ) + z = torch.zeros(eps.shape) + z[..., 1] = torch.tensor(3.0) * eps[..., 1] + z[..., 0] = torch.exp(z[..., 1] / 2.0) * eps[..., 0] + return z + + def log_prob( + self, value: torch.Tensor, context: Optional[torch.Tensor] = None + ) -> torch.Tensor: + if self._validate_args: + self._validate_sample(value) + x = value[..., 0] + y = value[..., 1] + + log_prob = dist.Normal(0, 3).log_prob(y) + log_prob += dist.Normal(0, torch.exp(y / 2)).log_prob(x) + + return log_prob diff --git a/flowtorch/docs.py b/flowtorch/docs.py new file mode 100644 index 00000000..b1a1749b --- /dev/null +++ b/flowtorch/docs.py @@ -0,0 +1,159 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +import importlib +import types +from collections import OrderedDict +from functools import lru_cache +from inspect import isclass, isfunction, ismodule +from typing import Any, Dict, Sequence, Mapping, Tuple + +# We don't want to include, e.g. both flowtorch.bijectors.Affine and +# flowtorch.bijectors.affine.Affine. Hence, we specify a list of modules +# to explicitly include in the API docs (and don't recurse on them). +# TODO: Include flowtorch.ops and flowtorch.numerical + +include_modules = [ + "flowtorch", + "flowtorch.bijectors", + "flowtorch.distributions", + # "flowtorch.experimental.parameters", + "flowtorch.nn", + "flowtorch.ops", + "flowtorch.parameters", + "flowtorch.utils", +] + + +def ispublic(name: str) -> bool: + return not name.startswith("_") + + +@lru_cache(maxsize=1) +def _documentable_modules() -> Mapping[types.ModuleType, Sequence[Tuple[str, Any]]]: + """ + Returns a list of (module, [(name, entity), ...]) pairs for modules + that are documentable + """ + + # TODO: Self document flowtorch.docs module + results = {} + + def dfs(dict: Mapping[str, Any]) -> None: + for key, val in dict.items(): + module = importlib.import_module(key) + entities = [ + (n, getattr(module, n)) + for n in sorted( + [ + n + for n in dir(module) + if ispublic(n) + and ( + isclass(getattr(module, n)) + or isfunction(getattr(module, n)) + ) + ] + ) + ] + results[module] = entities + + dfs(val) + + # Depth first search over module hierarchy, loading modules and extracting entities + dfs(_module_hierarchy()) + return results + + +@lru_cache(maxsize=1) +def _documentable_entities() -> Tuple[Sequence[str], Dict[str, Any]]: + """ + Returns a list of (str, entity) pairs for entities that are documentable + """ + + name_entity_mapping = {} + documentable_modules = _documentable_modules() + for module, entities in documentable_modules.items(): + if len(entities) > 0: + name_entity_mapping[module.__name__] = module + + for name, entity in entities: + qualified_name = f"{module.__name__}.{name}" + name_entity_mapping[qualified_name] = entity + + sorted_entity_names = sorted(name_entity_mapping.keys()) + return sorted_entity_names, name_entity_mapping + + +@lru_cache(maxsize=1) +def _module_hierarchy() -> Mapping[str, Any]: + # Make list of modules to search and their hierarchy + results: Dict[str, Any] = OrderedDict() + for module in sorted(include_modules): + submodules = module.split(".") + this_dict = results.setdefault(submodules[0], {}) + + for idx in range(1, len(submodules)): + submodule = ".".join(submodules[0 : (idx + 1)]) + this_dict.setdefault(submodule, {}) + this_dict = this_dict[submodule] + + return results + + +def generate_markdown(name: str, entity: Any) -> Tuple[str, str]: + """ + TODO: Method that inputs an object, extracts signature/docstring, + and formats as markdown + TODO: Method that build index markdown for overview files + The overview for the entire API is a special case + """ + + if name == "": + header = """--- +id: overview +sidebar_label: "Overview" +slug: "/api" +--- + +:::info + +These API stubs are generated from Python via a custom script and will filled +out in the future. + +::: + +""" + filename = "../website/docs/api/overview.mdx" + return filename, header + + # Regular modules/functions + item = { + "id": name, + "sidebar_label": "Overview" if ismodule(entity) else name.split(".")[-1], + "slug": f"/api/{name}", + "ref": entity, + "filename": f"../website/docs/api/{name}.mdx", + } + + header = f"""--- +id: {item['id']} +sidebar_label: {item['sidebar_label']} +slug: {item['slug']} +---""" + + markdown = header + return item["filename"], markdown + + +module_hierarchy = _module_hierarchy() +documentable_modules = _documentable_modules() +sorted_entity_names, name_entity_mapping = _documentable_entities() + +__all__ = [ + "documentable_modules", + "generate_markdown", + "module_hierarchy", + "name_entity_mapping", + "sorted_entity_names", +] diff --git a/flowtorch/lazy.py b/flowtorch/lazy.py new file mode 100644 index 00000000..d1572104 --- /dev/null +++ b/flowtorch/lazy.py @@ -0,0 +1,107 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +import inspect +from collections import OrderedDict +from typing import Tuple, Mapping, Any + + +# TODO: Move functions to flowtorch.utils? +def partial_signature( + sig: inspect.Signature, *args: Any, **kwargs: Any +) -> Tuple[inspect.Signature, Mapping[str, Any]]: + """ + Given an inspect.Signature object and a dictionary of (name, val) pairs, + bind the names to the signature and return a new modified signature + """ + bindings = dict(sig.bind_partial(*args, **kwargs).arguments) + + old_parameters = sig.parameters + new_parameters = OrderedDict() + + for param_name in old_parameters: + if param_name not in bindings: + new_parameters[param_name] = old_parameters[param_name] + + bound_sig = sig.replace(parameters=list(new_parameters.values())) + + return bound_sig, bindings + + +def count_unbound(sig: inspect.Signature) -> int: + return len( + [p for p, v in sig.parameters.items() if v.default is inspect.Parameter.empty] + ) + + +class LazyMeta(type): + def __call__(cls: Any, *args: Any, **kwargs: Any) -> Any: + """ + Intercept instance creation + """ + # Special behaviour for Lazy class + if cls.__qualname__ == "Lazy": + lazy_cls = args[0] + args = args[1:] + else: + lazy_cls = cls + + # Remove first argument (i.e., self) from signature of class' initializer + sig = inspect.signature(lazy_cls.__init__) + new_parameters = OrderedDict( + [(k, v) for idx, (k, v) in enumerate(sig.parameters.items()) if idx != 0] + ) + sig = sig.replace(parameters=list(new_parameters.values())) + + # Attempt binding arguments to initializer + bound_sig, bindings = partial_signature(sig, *args, **kwargs) + + # If there are no unbound arguments then instantiate class + if not count_unbound(bound_sig): + return type.__call__(lazy_cls, *args, **kwargs) + + # Otherwise, return Lazy instance + else: + return type.__call__(Lazy, lazy_cls, bindings, sig, bound_sig) + + +class Lazy(metaclass=LazyMeta): + """ + Represents delayed instantiation of a class. + """ + + def __init__( + self, + cls: Any, + bindings: Mapping[str, Any], + sig: inspect.Signature, + bound_sig: inspect.Signature, + ): + self.cls = cls + self.bindings = bindings + self.sig = sig + self.bound_sig = bound_sig + + def __repr__(self) -> str: + return f"Lazy(cls={self.cls.__name__}, bindings={self.bindings})" + + def __call__(self, *args: Any, **kwargs: Any) -> "Lazy": + """ + Apply additional bindings + """ + new_bindings = dict(self.bound_sig.bind_partial(*args, **kwargs).arguments) + new_bindings.update(self.bindings) + + # Update args and kwargs + new_args = [] + new_kwargs = {} + for n, p in self.sig.parameters.items(): + if n in new_bindings: + if p.kind == inspect.Parameter.POSITIONAL_ONLY: + new_args.append(new_bindings[n]) + + else: + new_kwargs[n] = new_bindings[n] + + # Attempt object creation + return Lazy(self.cls, *new_args, **new_kwargs) diff --git a/flowtorch/nn/__init__.py b/flowtorch/nn/__init__.py new file mode 100644 index 00000000..0c9263f7 --- /dev/null +++ b/flowtorch/nn/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + + +from flowtorch.nn.made import MaskedLinear, create_mask + +__all__ = ["create_mask", "MaskedLinear"] diff --git a/flowtorch/nn/made.py b/flowtorch/nn/made.py new file mode 100644 index 00000000..3556801e --- /dev/null +++ b/flowtorch/nn/made.py @@ -0,0 +1,121 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Sequence, Tuple + +import torch +import torch.nn as nn +from torch.nn import functional as F + + +def sample_mask_indices( + input_dim: int, hidden_dim: int, simple: bool = True +) -> torch.Tensor: + """ + Samples the indices assigned to hidden units during the construction of MADE masks + :param input_dim: the dimensionality of the input variable + :param hidden_dim: the dimensionality of the hidden layer + :param simple: True to space fractional indices by rounding to nearest + int, false round randomly + """ + indices = torch.linspace(1, input_dim, steps=hidden_dim, device="cpu").to( + torch.Tensor().device + ) + if simple: + # Simple procedure tries to space fractional indices evenly by rounding + # to nearest int + return torch.round(indices) + else: + # "Non-simple" procedure creates fractional indices evenly then rounds + # at random + ints = indices.floor() + ints += torch.bernoulli(indices - ints) + return ints + + +def create_mask( + input_dim: int, + context_dim: int, + hidden_dims: Sequence[int], + permutation: torch.LongTensor, + output_multiplier: int, +) -> Tuple[Sequence[torch.Tensor], torch.Tensor]: + """ + Creates MADE masks for a conditional distribution + :param input_dim: the dimensionality of the input variable + :param context_dim: the dimensionality of the variable that is + conditioned on (for conditional densities) + :param hidden_dims: the dimensionality of the hidden layers(s) + :param permutation: the order of the input variables + :param output_multipliers: tiles the output (e.g. for when a separate + mean and scale parameter are desired) + """ + # Create mask indices for input, hidden layers, and final layer + # We use 0 to refer to the elements of the variable being conditioned on, + # and range(1:(D_latent+1)) for the input variable + var_index = torch.empty(permutation.shape, dtype=torch.get_default_dtype()) + var_index[permutation] = torch.arange(input_dim, dtype=torch.get_default_dtype()) + + # Create the indices that are assigned to the neurons + input_indices = torch.cat((torch.zeros(context_dim), 1 + var_index)) + + # For conditional MADE, introduce a 0 index that all the conditioned + # variables are connected to as per Paige and Wood (2016) (see below) + if context_dim > 0: + hidden_indices = [sample_mask_indices(input_dim, h) - 1 for h in hidden_dims] + else: + hidden_indices = [sample_mask_indices(input_dim - 1, h) for h in hidden_dims] + + # *** TODO: Fix this line *** + output_indices = ( + (var_index + 1).unsqueeze(-1).repeat(1, output_multiplier).reshape(-1) + ) + + # Create mask from input to output for the skips connections + mask_skip = (output_indices.unsqueeze(-1) > input_indices.unsqueeze(0)).type_as( + var_index + ) + + # Create mask from input to first hidden layer, and between subsequent + # hidden layers + masks = [ + (hidden_indices[0].unsqueeze(-1) >= input_indices.unsqueeze(0)).type_as( + var_index + ) + ] + for i in range(1, len(hidden_dims)): + masks.append( + ( + hidden_indices[i].unsqueeze(-1) >= hidden_indices[i - 1].unsqueeze(0) + ).type_as(var_index) + ) + + # Create mask from last hidden layer to output layer + masks.append( + (output_indices.unsqueeze(-1) > hidden_indices[-1].unsqueeze(0)).type_as( + var_index + ) + ) + + return masks, mask_skip + + +class MaskedLinear(nn.Linear): + """ + A linear mapping with a given mask on the weights (arbitrary bias) + :param in_features: the number of input features + :param out_features: the number of output features + :param mask: the mask to apply to the in_features x out_features weight matrix + :param bias: whether or not `MaskedLinear` should include a bias term. + defaults to `True` + """ + + def __init__( + self, in_features: int, out_features: int, mask: torch.Tensor, bias: bool = True + ) -> None: + super().__init__(in_features, out_features, bias) + self.register_buffer("mask", mask.data) + + def forward(self, _input: torch.Tensor) -> torch.Tensor: + masked_weight = self.weight * self.mask + return F.linear(_input, masked_weight, self.bias) diff --git a/flowtorch/ops/__init__.py b/flowtorch/ops/__init__.py new file mode 100644 index 00000000..876a4124 --- /dev/null +++ b/flowtorch/ops/__init__.py @@ -0,0 +1,303 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Optional, Tuple + +import torch +import torch.nn.functional as F + +eps = 1e-8 + + +def clamp_preserve_gradients(x: torch.Tensor, min: float, max: float) -> torch.Tensor: + """ + This helper function clamps gradients but still passes through the + gradient in clamped regions + """ + return x + (x.clamp(min, max) - x).detach() + + +def clipped_sigmoid(x: torch.Tensor) -> torch.Tensor: + finfo = torch.finfo(x.dtype) + return torch.clamp(torch.sigmoid(x), min=finfo.tiny, max=1.0 - finfo.eps) + + +def softplus_inv(y: torch.Tensor) -> torch.Tensor: + return y + y.neg().expm1().neg().log() + + +def _searchsorted(sorted_sequence: torch.Tensor, values: torch.Tensor) -> torch.Tensor: + """ + Searches for which bin an input belongs to (in a way that is parallelizable and + amenable to autodiff) + TODO: Replace with torch.searchsorted once it is released + """ + return torch.sum(values[..., None] >= sorted_sequence, dim=-1) - 1 + + +def _select_bins(x: torch.Tensor, idx: torch.Tensor) -> torch.Tensor: + """ + Performs gather to select the bin in the correct way on batched inputs + """ + idx = idx.clamp(min=0, max=x.size(-1) - 1) + + """ + Broadcast dimensions of idx over x + idx ~ (batch_dims, input_dim, 1) + x ~ (context_batch_dims, input_dim, count_bins) + Note that by convention, the context variable batch dimensions must broadcast + over the input batch dimensions. + """ + if len(idx.shape) >= len(x.shape): + x = x.reshape((1,) * (len(idx.shape) - len(x.shape)) + x.shape) + x = x.expand(idx.shape[:-2] + (-1,) * 2) + + return x.gather(-1, idx).squeeze(-1) + + +def _calculate_knots( + lengths: torch.Tensor, lower: float, upper: float +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Given a tensor of unscaled bin lengths that sum to 1, plus the lower and upper + limits, returns the shifted and scaled lengths plus knot positions + """ + + # Cumulative widths gives x (y for inverse) position of knots + knots = torch.cumsum(lengths, dim=-1) + + # Pad left of last dimension with 1 zero to compensate for dim lost to cumsum + knots = F.pad(knots, pad=(1, 0), mode="constant", value=0.0) + + # Translate [0,1] knot points to [-B, B] + knots = (upper - lower) * knots + lower + + # Convert the knot points back to lengths + # NOTE: Are following two lines a necessary fix for accumulation (round-off) error? + knots[..., 0] = lower + knots[..., -1] = upper + lengths = knots[..., 1:] - knots[..., :-1] + + return lengths, knots + + +def monotonic_rational_spline( + inputs: torch.Tensor, + widths: torch.Tensor, + heights: torch.Tensor, + derivatives: torch.Tensor, + lambdas: Optional[torch.Tensor] = None, + inverse: bool = False, + bound: float = 3.0, + min_bin_width: float = 1e-3, + min_bin_height: float = 1e-3, + min_derivative: float = 1e-3, + min_lambda: float = 0.025, + eps: float = 1e-6, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Calculating a monotonic rational spline (linear or quadratic) or its inverse, + plus the log(abs(detJ)) required for normalizing flows. + NOTE: I omit the docstring with parameter descriptions for this method since it + is not considered "public" yet! + """ + + # Ensure bound is positive + # NOTE: For simplicity, we apply the identity function outside [-B, B] X [-B, B] + # rather than allowing arbitrary corners to the bounding box. If you want a + # different bounding box you can apply an affine transform before and after the + # input. + assert bound > 0.0 + + num_bins = widths.shape[-1] + if min_bin_width * num_bins > 1.0: + raise ValueError("Minimal bin width too large for the number of bins") + if min_bin_height * num_bins > 1.0: + raise ValueError("Minimal bin height too large for the number of bins") + + # inputs, inside_interval_mask, outside_interval_mask ~ (batch_dim, input_dim) + left, right = -bound, bound + bottom, top = -bound, bound + inside_interval_mask = (inputs >= left) & (inputs <= right) + outside_interval_mask = ~inside_interval_mask + + # outputs, logabsdet ~ (batch_dim, input_dim) + outputs = torch.zeros_like(inputs) + logabsdet = torch.zeros_like(inputs) + + # For numerical stability, put lower/upper limits on parameters. E.g. give + # every bin min_bin_width, then add width fraction of remaining length. + # NOTE: Do this here rather than higher up because we want everything to + # ensure numerical stability within this function. + widths = min_bin_width + (1.0 - min_bin_width * num_bins) * widths + heights = min_bin_height + (1.0 - min_bin_height * num_bins) * heights + derivatives = min_derivative + derivatives + + # Cumulative widths are x (y for inverse) position of knots + # Similarly, cumulative heights are y (x for inverse) position of knots + widths, cumwidths = _calculate_knots(widths, left, right) + heights, cumheights = _calculate_knots(heights, bottom, top) + + # Pad left and right derivatives with fixed values at first and last knots + # These are 1 since the function is the identity outside the bounding box + # and the derivative is continuous. + # NOTE: Not sure why this is 1.0 - min_derivative rather than 1.0. I've + # copied this from original implementation + derivatives = F.pad( + derivatives, pad=(1, 1), mode="constant", value=1.0 - min_derivative + ) + + # Get the index of the bin that each input is in + # bin_idx ~ (batch_dim, input_dim, 1) + bin_idx = _searchsorted( + cumheights + eps if inverse else cumwidths + eps, inputs + ).unsqueeze(-1) + + # Select the value for the relevant bin for the variables + # used in the main calculation + input_widths = _select_bins(widths, bin_idx) + input_cumwidths = _select_bins(cumwidths, bin_idx) + input_cumheights = _select_bins(cumheights, bin_idx) + input_delta = _select_bins(heights / widths, bin_idx) + input_derivatives = _select_bins(derivatives, bin_idx) + input_derivatives_plus_one = _select_bins(derivatives[..., 1:], bin_idx) + input_heights = _select_bins(heights, bin_idx) + + # Calculate monotonic *linear* rational spline + if lambdas is not None: + lambdas = (1 - 2 * min_lambda) * lambdas + min_lambda + input_lambdas = _select_bins(lambdas, bin_idx) + + # The weight, w_a, at the left-hand-side of each bin + # We are free to choose w_a, so set it to 1 + wa = 1.0 + + # The weight, w_b, at the right-hand-side of each bin + # This turns out to be a multiple of the w_a + # TODO: Should this be done in log space for numerical stability? + wb = torch.sqrt(input_derivatives / input_derivatives_plus_one) * wa + + # The weight, w_c, at the division point of each bin + # Recall that each bin is divided into two parts so we have enough + # d.o.f. to fit spline + wc = ( + input_lambdas * wa * input_derivatives + + (1 - input_lambdas) * wb * input_derivatives_plus_one + ) / input_delta + + # Calculate y coords of bins + ya = input_cumheights + yb = input_heights + input_cumheights + yc = ((1.0 - input_lambdas) * wa * ya + input_lambdas * wb * yb) / ( + (1.0 - input_lambdas) * wa + input_lambdas * wb + ) + + if inverse: + numerator = (input_lambdas * wa * (ya - inputs)) * ( + inputs <= yc + ).float() + ( + (wc - input_lambdas * wb) * inputs + input_lambdas * wb * yb - wc * yc + ) * ( + inputs > yc + ).float() + + denominator = ((wc - wa) * inputs + wa * ya - wc * yc) * ( + inputs <= yc + ).float() + ((wc - wb) * inputs + wb * yb - wc * yc) * (inputs > yc).float() + + theta = numerator / denominator + + outputs = theta * input_widths + input_cumwidths + + derivative_numerator = ( + wa * wc * input_lambdas * (yc - ya) * (inputs <= yc).float() + + wb * wc * (1 - input_lambdas) * (yb - yc) * (inputs > yc).float() + ) * input_widths + + logabsdet = torch.log(derivative_numerator) - 2 * torch.log( + torch.abs(denominator) + ) + + else: + theta = (inputs - input_cumwidths) / input_widths + + numerator = (wa * ya * (input_lambdas - theta) + wc * yc * theta) * ( + theta <= input_lambdas + ).float() + (wc * yc * (1 - theta) + wb * yb * (theta - input_lambdas)) * ( + theta > input_lambdas + ).float() + + denominator = (wa * (input_lambdas - theta) + wc * theta) * ( + theta <= input_lambdas + ).float() + (wc * (1 - theta) + wb * (theta - input_lambdas)) * ( + theta > input_lambdas + ).float() + + outputs = numerator / denominator + + derivative_numerator = ( + wa * wc * input_lambdas * (yc - ya) * (theta <= input_lambdas).float() + + wb + * wc + * (1 - input_lambdas) + * (yb - yc) + * (theta > input_lambdas).float() + ) / input_widths + + logabsdet = torch.log(derivative_numerator) - 2 * torch.log( + torch.abs(denominator) + ) + + # Calculate monotonic *quadratic* rational spline + else: + if inverse: + a = (inputs - input_cumheights) * ( + input_derivatives + input_derivatives_plus_one - 2 * input_delta + ) + input_heights * (input_delta - input_derivatives) + b = input_heights * input_derivatives - (inputs - input_cumheights) * ( + input_derivatives + input_derivatives_plus_one - 2 * input_delta + ) + c = -input_delta * (inputs - input_cumheights) + + discriminant = b.pow(2) - 4 * a * c + assert (discriminant >= 0).all() + + root = (2 * c) / (-b - torch.sqrt(discriminant)) + outputs = root * input_widths + input_cumwidths + + theta_one_minus_theta = root * (1 - root) + denominator = input_delta + ( + (input_derivatives + input_derivatives_plus_one - 2 * input_delta) + * theta_one_minus_theta + ) + derivative_numerator = input_delta.pow(2) * ( + input_derivatives_plus_one * root.pow(2) + + 2 * input_delta * theta_one_minus_theta + + input_derivatives * (1 - root).pow(2) + ) + logabsdet = -(torch.log(derivative_numerator) - 2 * torch.log(denominator)) + + else: + theta = (inputs - input_cumwidths) / input_widths + theta_one_minus_theta = theta * (1 - theta) + + numerator = input_heights * ( + input_delta * theta.pow(2) + input_derivatives * theta_one_minus_theta + ) + denominator = input_delta + ( + (input_derivatives + input_derivatives_plus_one - 2 * input_delta) + * theta_one_minus_theta + ) + outputs = input_cumheights + numerator / denominator + + derivative_numerator = input_delta.pow(2) * ( + input_derivatives_plus_one * theta.pow(2) + + 2 * input_delta * theta_one_minus_theta + + input_derivatives * (1 - theta).pow(2) + ) + logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator) + + # Apply the identity function outside the bounding box + outputs[outside_interval_mask] = inputs[outside_interval_mask] + logabsdet[outside_interval_mask] = 0.0 + return outputs, logabsdet diff --git a/flowtorch/parameters/__init__.py b/flowtorch/parameters/__init__.py new file mode 100644 index 00000000..eb42fca2 --- /dev/null +++ b/flowtorch/parameters/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +""" +Warning: This file was generated by flowtorch/scripts/generate_imports.py +Do not modify or delete! + +""" + +from flowtorch.parameters.base import Parameters +from flowtorch.parameters.dense_autoregressive import DenseAutoregressive +from flowtorch.parameters.tensor import Tensor + +__all__ = ["Parameters", "DenseAutoregressive", "Tensor"] diff --git a/flowtorch/parameters/base.py b/flowtorch/parameters/base.py new file mode 100644 index 00000000..592bc0c5 --- /dev/null +++ b/flowtorch/parameters/base.py @@ -0,0 +1,41 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT +from typing import Optional, Sequence + +import torch +from flowtorch import LazyMeta + + +class Parameters(torch.nn.Module, metaclass=LazyMeta): + """ + Deferred initialization of parameters. + """ + + def __init__( + self, + param_shapes: Sequence[torch.Size], + input_shape: torch.Size, + context_shape: Optional[torch.Size], + ) -> None: + super().__init__() + self.input_shape = input_shape + self.param_shapes = param_shapes + self.context_shape = context_shape + + def forward( + self, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> Sequence[torch.Tensor]: + # TODO: Caching etc. + return self._forward(x, context) + + def _forward( + self, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> Sequence[torch.Tensor]: + # I raise an exception rather than using @abstractmethod and + # metaclass=ABC so that we can reserve the metaclass for lazy + # evaluation. + raise NotImplementedError() diff --git a/flowtorch/parameters/dense_autoregressive.py b/flowtorch/parameters/dense_autoregressive.py new file mode 100644 index 00000000..dc02de63 --- /dev/null +++ b/flowtorch/parameters/dense_autoregressive.py @@ -0,0 +1,185 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +import warnings +from typing import Callable, Optional, Sequence + +import torch +import torch.nn as nn +from flowtorch.nn.made import MaskedLinear, create_mask +from flowtorch.parameters.base import Parameters + + +class DenseAutoregressive(Parameters): + autoregressive = True + + def __init__( + self, + param_shapes: Sequence[torch.Size], + input_shape: torch.Size, + context_shape: Optional[torch.Size], + *, + hidden_dims: Sequence[int] = (256, 256), + nonlinearity: Callable[[], nn.Module] = nn.ReLU, + permutation: Optional[torch.LongTensor] = None, + skip_connections: bool = False, + ) -> None: + super().__init__(param_shapes, input_shape, context_shape) + + # Check consistency of input_shape with param_shapes + # We need each param_shapes to match input_shape in + # its leftmost dimensions + for s in param_shapes: + assert len(s) >= len(input_shape) and s[: len(input_shape)] == input_shape + + self.hidden_dims = hidden_dims + self.nonlinearity = nonlinearity + self.skip_connections = skip_connections + self._build(input_shape, param_shapes, context_shape, permutation) + + def _build( + self, + input_shape: torch.Size, + param_shapes: Sequence[torch.Size], + context_shape: Optional[torch.Size], + permutation: Optional[torch.LongTensor], + ) -> None: + # Work out flattened input and output shapes + param_shapes_ = list(param_shapes) + input_dims = int(torch.sum(torch.tensor(input_shape)).int().item()) + if input_dims == 0: + input_dims = 1 # scalars represented by torch.Size([]) + if permutation is None: + # By default set a random permutation of variables, which is + # important for performance with multiple steps + permutation = torch.LongTensor( + torch.randperm(input_dims, device="cpu").to( + torch.LongTensor((1,)).device + ) + ) + else: + # The permutation is chosen by the user + permutation = torch.LongTensor(permutation) + + self.param_dims = [ + int(max(torch.prod(torch.tensor(s[len(input_shape) :])).item(), 1)) + for s in param_shapes_ + ] + + self.output_multiplier = sum(self.param_dims) + + if input_dims == 1: + warnings.warn( + "DenseAutoregressive input_dim = 1. " + "Consider using an affine transformation instead." + ) + + # Calculate the indices on the output corresponding to each parameter + # TODO: Is this logic correct??? + # ends = torch.cumsum( + # torch.tensor( + # [max(torch.prod(torch.tensor(s)).item(), 1) for s in param_shapes_] + # ), + # dim=0, + # ) + # starts = torch.cat((torch.zeros(1).type_as(ends), ends[:-1])) + # self.param_slices = [slice(s.item(), e.item()) for s, e in zip(starts, ends)] + + # Hidden dimension must be not less than the input otherwise it isn't + # possible to connect to the outputs correctly + for h in self.hidden_dims: + if h < input_dims: + raise ValueError( + "Hidden dimension must not be less than input dimension." + ) + + # TODO: Check that the permutation is valid for the input dimension! + # Implement ispermutation() that sorts permutation and checks whether it + # has all integers from 0, 1, ..., self.input_dims - 1 + self.register_buffer("permutation", permutation) + + # Create masks + hidden_dims = self.hidden_dims + masks, mask_skip = create_mask( + input_dim=input_dims, + context_dim=0, # context_dims, + hidden_dims=hidden_dims, + permutation=permutation, + output_multiplier=self.output_multiplier, + ) + + # Create masked layers + layers = [ + MaskedLinear( + input_dims, # + context_dims, + hidden_dims[0], + masks[0], + ), + self.nonlinearity(), + ] + for i in range(1, len(hidden_dims)): + layers.extend( + [ + MaskedLinear(hidden_dims[i - 1], hidden_dims[i], masks[i]), + self.nonlinearity(), + ] + ) + layers.append( + MaskedLinear( + hidden_dims[-1], + input_dims * self.output_multiplier, + masks[-1], + ) + ) + + if self.skip_connections: + layers.append( + MaskedLinear( + input_dims, # + context_dims, + input_dims * self.output_multiplier, + mask_skip, + bias=False, + ) + ) + + self.layers = nn.ModuleList(layers) + + def _forward( + self, + x: Optional[torch.Tensor] = None, + context: Optional[torch.Tensor] = None, + ) -> Sequence[torch.Tensor]: + assert x is not None + + # Flatten x + batch_shape = x.shape[: len(x.shape) - len(self.input_shape)] + if len(batch_shape) > 0: + x = x.reshape(batch_shape + (-1,)) + + if context is not None: + # TODO: Fix the following! + h = torch.cat([context.expand((x.shape[0], -1)), x], dim=-1) + else: + h = x + + for idx in range(len(self.layers) // 2): + h = self.layers[2 * idx + 1](self.layers[2 * idx](h)) + h = self.layers[-1](h) + + # TODO: Get skip_layers working again! + # if self.skip_layer is not None: + # h = h + self.skip_layer(x) + + # Shape the output + # h ~ (batch_dims * input_dims, total_params_per_dim) + h = h.reshape(-1, self.output_multiplier) + + # result ~ (batch_dims * input_dims, params_per_dim[0]), ... + result = h.split(list(self.param_dims), dim=-1) + + # results ~ (batch_shape, param_shapes[0]), ... + result = tuple( + h_slice.view(batch_shape + p_shape) + for h_slice, p_shape in zip(result, list(self.param_shapes)) + ) + return result diff --git a/flowtorch/parameters/tensor.py b/flowtorch/parameters/tensor.py new file mode 100644 index 00000000..4d5ea3f2 --- /dev/null +++ b/flowtorch/parameters/tensor.py @@ -0,0 +1,28 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +from typing import Optional, Sequence + +import torch +import torch.nn as nn +from flowtorch.parameters.base import Parameters + + +class Tensor(Parameters): + def __init__( + self, + param_shapes: Sequence[torch.Size], + input_shape: torch.Size, + context_shape: Optional[torch.Size] = None, + ) -> None: + super().__init__(param_shapes, input_shape, context_shape) + + # TODO: Initialization strategies and constraints! + self.params = nn.ParameterList( + [nn.Parameter(torch.randn(shape) * 0.001) for shape in param_shapes] + ) + + def _forward( + self, x: Optional[torch.Tensor] = None, context: Optional[torch.Tensor] = None + ) -> Sequence[torch.Tensor]: + return list(self.params) diff --git a/flowtorch/utils.py b/flowtorch/utils.py new file mode 100644 index 00000000..9d85a302 --- /dev/null +++ b/flowtorch/utils.py @@ -0,0 +1,104 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +import importlib +import inspect +import os +import pkgutil +from functools import partial +from typing import Sequence, Tuple, Callable, Optional, Any + +import flowtorch +from flowtorch.bijectors.base import Bijector +from flowtorch.parameters.base import Parameters +from torch.distributions import Distribution + + +copyright_header = """Copyright (c) Facebook, Inc. and its affiliates. \ +All Rights Reserved +SPDX-License-Identifier: MIT""" + + +def classname(cls: type) -> str: + return ".".join([cls.__module__, cls.__name__]) + + +def issubclass_byname(cls: type, test_cls: type) -> bool: + """ + Test whether a class is a subclass of another by class names, in contrast + to the built-in issubclass that does it by instance. + """ + return classname(test_cls) in [classname(c) for c in cls.__mro__] + + +def isderivedclass(cls: type, base_cls: type) -> bool: + # NOTE issubclass won't always do what we want here if base_cls is imported + # inside the module of cls. I.e. issubclass returns False if cls inherits + # from a base_cls with a different instance. + return inspect.isclass(cls) and issubclass_byname(cls, base_cls) + + +def list_bijectors() -> Sequence[Tuple[str, Bijector]]: + ans = _walk_packages("bijectors", partial(isderivedclass, base_cls=Bijector)) + ans = [a for a in ans if ".ops." not in a[1].__module__] + return list({classname(cls[1]): cls for cls in ans}.values()) + + +def list_parameters() -> Sequence[Tuple[str, Parameters]]: + ans = _walk_packages("parameters", partial(isderivedclass, base_cls=Parameters)) + return list({classname(cls[1]): cls for cls in ans}.values()) + + +def list_distributions() -> Sequence[Tuple[str, Parameters]]: + ans = _walk_packages( + "distributions", partial(isderivedclass, base_cls=Distribution) + ) + return list({classname(cls[1]): cls for cls in ans}.values()) + + +def _walk_packages( + modname: str, filter: Optional[Callable[[Any], bool]] +) -> Sequence[Tuple[str, Any]]: + classes = [] + + # NOTE: I use path of flowtorch rather than e.g. flowtorch.bijectors + # to avoid circular imports + path = [os.path.join(flowtorch.__path__[0], modname)] # type: ignore + + # The followings line uncovered a bug that hasn't been fixed in mypy: + # https://github.com/python/mypy/issues/1422 + for importer, this_modname, _ in pkgutil.walk_packages( + path=path, # type: ignore # mypy issue #1422 + prefix=f"{flowtorch.__name__}.{modname}.", + onerror=lambda x: None, + ): + # Conditions required for mypy + if importer is not None: + if isinstance(importer, importlib.abc.MetaPathFinder): + finder = importer.find_module(this_modname, None) + elif isinstance(importer, importlib.abc.PathEntryFinder): + finder = importer.find_module(this_modname) + else: + finder = None + + if finder is not None: + module = finder.load_module(this_modname) + + else: + raise Exception("Finder is none") + + if module is not None: + this_classes = inspect.getmembers(module, filter) + classes.extend(this_classes) + + del module + del finder + + else: + raise Exception("Module is none") + + return classes + + +class InterfaceError(Exception): + pass diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..39c467c0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.usort] +first_party_detection=false diff --git a/scripts/copyright_headers.py b/scripts/copyright_headers.py new file mode 100644 index 00000000..1a7964a8 --- /dev/null +++ b/scripts/copyright_headers.py @@ -0,0 +1,166 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +import argparse +import os +import sys +from enum import Enum + +from flowtorch.utils import copyright_header + +lines_header = ["# " + ln + "\n" for ln in copyright_header.splitlines()] + + +class ReadState(Enum): + EMPTY = 0 + COMMENT = 1 + TRIPLE_QUOTES = 2 + + +def get_header(filename): + state = ReadState.EMPTY + header = [] + with open(filename, "r") as f: + for line_idx, line in enumerate(f.readlines()): + line = line.strip() + # Finite state machine to read "header" of Python source + # TODO: Can I write this much more compactly with regular expressions? + if state is ReadState.EMPTY: + if len(line) and line[0] == "#": + state = ReadState.COMMENT + header.append(line[1:].strip()) + continue + elif len(line) >= 3 and line[:3] == '"""': + state = ReadState.TRIPLE_QUOTES + header.append(line[3:].strip()) + continue + else: + # If the file doesn't begin with a comment we consider the + # header to be empty + return "\n".join(header).strip(), line_idx, state + + elif state is ReadState.COMMENT: + if len(line) and line[0] == "#": + header.append(line[1:].strip()) + continue + else: + return "\n".join(header).strip(), line_idx, state + + elif state is ReadState.TRIPLE_QUOTES: + if len(line) >= 3 and '"""' in line: + char_idx = line.find('"""') + header.append(line[:char_idx].strip()) + return "\n".join(header).strip(), line_idx, state + else: + header.append(line.strip()) + continue + + else: + raise RuntimeError("Invalid read state!") + + # Return error if triple quotes don't terminate + if state is ReadState.TRIPLE_QUOTES: + raise RuntimeError(f"Unterminated multi-line string in {f}") + + # If we get to here then the file is all header + return "\n".join(header).strip(), line_idx + 1, state + + +def walk_source(paths): + # Find all Python source files that are not Git ignored + source_files = set() + for path in paths: + for root, _, files in os.walk(path): + for name in files: + full_name = os.path.join(root, name) + if name.endswith(".py") and os.system( + f"git check-ignore -q {full_name}" + ): + source_files.add(full_name) + + return sorted(source_files) + + +def print_results(count_changed, args): + # Print results + if count_changed == 0 and args.check: + print(f"{count_changed} files would be left unchanged.") + elif count_changed == len(source_files) and args.check: + print(f"{count_changed} files would be changed.") + elif args.check: + print( + f"""{count_changed} files would be changed and {len(source_files) / + - count_changed} files would be unchanged.""" + ) + elif count_changed: + print(f"{count_changed} files fixed.") + + +if __name__ == "__main__": + # Parse command line arguments + # Example usage: python scripts/copyright_headers.py --check flowtorch tests scripts + parser = argparse.ArgumentParser( + description="Checks and adds the Facebook Incubator copyright header" + ) + parser.add_argument( + "-c", + "--check", + action="store_true", + help="just checks files and does not change any", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="prints extra information on files" + ) + parser.add_argument( + "paths", nargs="+", help="paths to search for Python source files" + ) + args = parser.parse_args() + + source_files = walk_source(args.paths) + + # Loop over source files and get the "header" + count_changed = 0 + for name in source_files: + header, line_idx, state = get_header(name) + + # Replace if it's not equal, starts with empty space, or is not a comment + if ( + header != copyright_header + or line_idx != 2 + or not state == ReadState.COMMENT + ): + count_changed += 1 + if args.verbose: + print(name) + + if not args.check: + # Read the file + with open(name, "r") as f: + lines = f.readlines() + + # Replace the header + # TODO: Debug the following! + if state == ReadState.TRIPLE_QUOTES: + after_quotes = lines[line_idx][ + (lines[line_idx].find('"""') + 3) : + ].lstrip() + if after_quotes == "": + lines = lines[line_idx + 1 :] + elif after_quotes.startswith(";"): + lines = [after_quotes[1:].lstrip()] + lines[line_idx + 1 :] + else: + raise RuntimeError( + "Statements must be separated by newlines or semicolons" + ) + else: + lines = lines[line_idx:] + + lines = lines_header + lines + filestring = "".join(lines) + + # Save back to disk + with open(name, "w") as f: + f.write(filestring) + + print_results(count_changed, args) + sys.exit(count_changed) diff --git a/scripts/generate_api_docs.py b/scripts/generate_api_docs.py new file mode 100644 index 00000000..e43fe033 --- /dev/null +++ b/scripts/generate_api_docs.py @@ -0,0 +1,80 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +""" +Generates MDX (Markdown + JSX, see https://mdxjs.com/) files and sidebar +information for the Docusaurus v2 website from the library components' +docstrings. + +We have chosen to take this approach to integrate our API documentation +with Docusaurus because there is no pre-existing robust solution to use +Sphinx output with Docusaurus. + +This script will be run by the "documentation" GitHub workflow on pushes +and pull requests to the main branch. It will function corrrectly from +any working directory. + +""" + +import errno +import os + +import flowtorch +from flowtorch.docs import ( + documentable_modules, + generate_markdown, + module_hierarchy, + name_entity_mapping, +) + +if __name__ == "__main__": + # Create website/docs/api if doesn't exist + try: + os.makedirs(os.path.join(flowtorch.__path__[0], "../website/docs/api")) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + # Build sidebar JSON based on module hierarchy and save to 'website/api.sidebar.js' + all_sidebar_items = [] + + documentable_module_names = {m.__name__: v for m, v in documentable_modules.items()} + + def module_sidebar(mod_name, items): + return f"{{\n type: 'category',\n label: '{mod_name}',\n \ +collapsed: {'false' if mod_name in module_hierarchy.keys() else 'true'},\ + items: [{', '.join(items)}],\n}}" + + def dfs(dict): + sidebar_items = [] + for key, val in dict.items(): + items = ( + [f'"api/{key}"'] + + [f'"api/{key}.{item[0]}"' for item in documentable_module_names[key]] + if len(documentable_module_names[key]) > 0 + else [] + ) + + if val != {}: + items.extend(dfs(val)) + + sidebar_items.append(module_sidebar(key, items)) + + return sidebar_items + + # Convert class hierarchy into API sidebar + with open( + os.path.join(flowtorch.__path__[0], "../website/api.sidebar.js"), "w" + ) as file: + print("module.exports = [\n'api/overview',", file=file) + print(",".join(dfs(module_hierarchy)), file=file) + print("];", file=file) + + # Generate markdown files for documentable entities + name_entity_mapping = name_entity_mapping.copy() + name_entity_mapping[""] = None + for name, entity in name_entity_mapping.items(): + filename, markdown = generate_markdown(name, entity) + + with open(os.path.join(flowtorch.__path__[0], filename), "w") as file: + print(markdown, file=file) diff --git a/scripts/generate_imports.py b/scripts/generate_imports.py new file mode 100644 index 00000000..d160f0a9 --- /dev/null +++ b/scripts/generate_imports.py @@ -0,0 +1,199 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +""" +Generates imports files for bijectors, distributions, and parameters. +This script assumes that you have used `setup.py develop`. + +""" + +import errno +import io +import os + +import black +import flowtorch +import torch +from flowtorch.utils import ( + classname, + copyright_header, + list_distributions, + list_bijectors, + list_parameters, +) + +copyright_header = "".join(["# " + ln + "\n" for ln in copyright_header.splitlines()]) + +autogen_msg = """\"\"\" +Warning: This file was generated by flowtorch/scripts/generate_imports.py +Do not modify or delete! + +\"\"\" +""" + +bijectors_imports = """import inspect +from typing import cast, List, Tuple + +import torch""" + +bijectors_code = """def isbijector(cls: type) -> bool: + # A class must inherit from flowtorch.Bijector to be considered a valid bijector + return issubclass(cls, Bijector) + + +def standard_bijector(cls: type) -> bool: + # "Standard bijectors" are the ones we can perform standard automated tests upon + return ( + inspect.isclass(cls) + and isbijector(cls) + and cls.__name__ not in [clx for clx, _ in meta_bijectors] + ) + +# Determine invertible bijectors +invertible_bijectors = [] +for bij_name, cls in standard_bijectors: + # TODO: Use factored out version of the following + # Define plan for flow + event_dim = max(cls.domain.event_dim, 1) # type: ignore + event_shape = event_dim * [4] + # base_dist = dist.Normal(torch.zeros(event_shape), torch.ones(event_shape)) + bij = cls(shape=torch.Size(event_shape)) + + try: + y = torch.randn(*bij.forward_shape(event_shape)) + bij.inverse(y) + except NotImplementedError: + pass + else: + invertible_bijectors.append((bij_name, cls)) + + +__all__ = ["standard_bijectors", "meta_bijectors", "invertible_bijectors"] + [ + cls + for cls, _ in cast(List[Tuple[str, Bijector]], meta_bijectors) + + cast(List[Tuple[str, Bijector]], standard_bijectors) +]""" + +mode = black.FileMode() +fast = False + + +def generate_imports_plain(filename, classes): + with io.StringIO() as file: + # Sort classes by qualified name + classes = sorted(classes, key=lambda tup: classname(tup[1])) + + print(copyright_header, file=file) + print(autogen_msg, file=file) + for s, cls in classes: + print(f"from {cls.__module__} import {s}", file=file) + + print("", file=file) + + print("__all__ = [", file=file) + all_list = ",\n\t".join([f'"{s}"' for s, _ in classes]) + print("\t", end="", file=file) + print(all_list, file=file) + print("]", end="", file=file) + + contents = file.getvalue() + + with open(filename, "w") as real_file: + print( + black.format_file_contents(contents, fast=fast, mode=mode), + file=real_file, + end="", + ) + + +def generate_imports_bijectors(filename): + bij = list_bijectors() + meta_bijectors = [] + standard_bijectors = [] + + # Standard bijectors can be initialized with a shape, whereas + # meta bijectors will throw a TypeError or require additional + # keyword arguments (e.g., bij.Compose) + # TODO: Refactor this into flowtorch.utils.ismetabijector + for b in bij: + try: + cls = b[1] + x = cls(shape=torch.Size([2] * cls.domain.event_dim)) + except TypeError: + meta_bijectors.append(b) + else: + if isinstance(x, flowtorch.Lazy): + meta_bijectors.append(b) + else: + standard_bijectors.append(b) + + with io.StringIO() as file: + # Sort classes by qualified name + classes = standard_bijectors + meta_bijectors + classes = sorted(classes, key=lambda tup: classname(tup[1])) + + # Copyright header and warning message + print(copyright_header, file=file) + print(autogen_msg, file=file) + + # Non-FlowTorch imports + print(bijectors_imports, file=file) + + # FlowTorch imports + for s, cls in classes: + print(f"from {cls.__module__} import {s}", file=file) + print("", file=file) + + # Create lists of bijectors for each type + meta_str = ",\n ".join([f'("{b[0]}", {b[0]})' for b in meta_bijectors]) + standard_str = ",\n ".join( + [f'("{b[0]}", {b[0]})' for b in standard_bijectors] + ) + + print( + f"""standard_bijectors = [ + {standard_str} +] +""", + file=file, + ) + + print( + f"""meta_bijectors = [ + {meta_str} +] +""", + file=file, + ) + + # Rest of code + print(bijectors_code, file=file) + + contents = file.getvalue() + + with open(filename, "w") as real_file: + print( + black.format_file_contents(contents, fast=fast, mode=mode), + file=real_file, + end="", + ) + + +if __name__ == "__main__": + # Create module folders if they don't exist + try: + for m in ["distributions", "bijectors", "parameters"]: + os.makedirs(os.path.join(flowtorch.__path__[0], m)) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + bijectors_init = os.path.join(flowtorch.__path__[0], "bijectors/__init__.py") + distributions_init = os.path.join( + flowtorch.__path__[0], "distributions/__init__.py" + ) + parameters_init = os.path.join(flowtorch.__path__[0], "parameters/__init__.py") + + generate_imports_bijectors(bijectors_init) + generate_imports_plain(distributions_init, list_distributions()) + generate_imports_plain(parameters_init, list_parameters()) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..03542267 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,9 @@ +[flake8] +max-line-length = 80 +max-complexity = 12 +ignore = E501 +select = C,E,F,W,B,B9 +extend-ignore = E203, W503 + +[metadata] +license_files = LICENSE.txt diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..e955b270 --- /dev/null +++ b/setup.py @@ -0,0 +1,87 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +import os +import sys + +from setuptools import find_packages, setup + +REQUIRED_MAJOR = 3 +REQUIRED_MINOR = 7 + + +TEST_REQUIRES = ["numpy", "pytest", "pytest-cov", "scipy"] +DEV_REQUIRES = TEST_REQUIRES + [ + "black", + "flake8", + "flake8-bugbear", + "mypy", + "usort", +] + + +# Check for python version +if sys.version_info < (REQUIRED_MAJOR, REQUIRED_MINOR): + error = ( + "Your version of python ({major}.{minor}) is too old. You need " + "python >= {required_major}.{required_minor}." + ).format( + major=sys.version_info.major, + minor=sys.version_info.minor, + required_minor=REQUIRED_MINOR, + required_major=REQUIRED_MAJOR, + ) + sys.exit(error) + + +# read in README.md as the long description +with open("README.md", "r") as fh: + long_description = fh.read() + +setup( + name="flowtorch", + description="Normalizing Flows for PyTorch", + author="FlowTorch Development Team", + author_email="info@stefanwebb.me", + license="MIT", + url="https://flowtorch.ai/users", + project_urls={ + "Documentation": "https://flowtorch.ai/users", + "Source": "https://www.github.com/facebookincubator/flowtorch", + }, + keywords=[ + "Deep Learning", + "Bayesian Inference", + "Statistical Modeling", + "Variational Inference", + "PyTorch", + ], + classifiers=[ + "Development Status :: 3 - Alpha", + "Programming Language :: Python :: 3 :: Only", + "License :: OSI Approved :: MIT License", + "Topic :: Scientific/Engineering", + "Intended Audience :: Science/Research", + "Intended Audience :: Developers", + ], + long_description=long_description, + long_description_content_type="text/markdown", + python_requires=">={}.{}".format(REQUIRED_MAJOR, REQUIRED_MINOR), + install_requires=[ + "torch>=1.8.1", + ], + setup_requires=["setuptools_scm"], + use_scm_version={ + "root": ".", + "relative_to": __file__, + "write_to": os.path.join("flowtorch", "version.py"), + }, + packages=find_packages( + include=["flowtorch", "flowtorch.*"], + exclude=["debug", "tests", "website", "examples", "scripts"], + ), + extras_require={ + "dev": DEV_REQUIRES, + "test": TEST_REQUIRES, + }, +) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..5fe5d8a4 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,16 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT +import random + +import numpy as np +import pytest +import torch + + +@pytest.fixture(scope="function", autouse=True) +def set_seeds_before_every_test(): + torch.manual_seed(42) + np.random.seed(42) + random.seed(42) + + yield # yield control to the test to run diff --git a/tests/test_bijector.py b/tests/test_bijector.py new file mode 100644 index 00000000..dad4eefd --- /dev/null +++ b/tests/test_bijector.py @@ -0,0 +1,130 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT +import flowtorch.bijectors as bijectors +import numpy as np +import pytest +import torch +import torch.distributions as dist +import torch.optim +from flowtorch.distributions import Flow + +""" +def test_bijector_constructor(): + param_fn = flowtorch.params.DenseAutoregressive() + b = flowtorch.bijectors.AffineAutoregressive(param_fn=param_fn) + assert b is not None +""" + + +@pytest.fixture(params=[bij_name for _, bij_name in bijectors.standard_bijectors]) +def flow(request): + bij = request.param + event_dim = max(bij.domain.event_dim, 1) + event_shape = event_dim * [3] + base_dist = dist.Independent( + dist.Normal(torch.zeros(event_shape), torch.ones(event_shape)), event_dim + ) + + flow = Flow(base_dist, bij) + return flow + + +def test_jacobian(flow, epsilon=1e-2): + # Instantiate transformed distribution and parameters + bij = flow.bijector + params = bij.params + + # Calculate auto-diff Jacobian + x = torch.randn(*flow.event_shape) + x = torch.distributions.transform_to(bij.domain)(x) + y = bij.forward(x) + if bij.domain.event_dim == 1: + analytic_ldt = bij.log_abs_det_jacobian(x, y).data + else: + analytic_ldt = bij.log_abs_det_jacobian(x, y).sum(-1).data + + # Calculate numerical Jacobian + # TODO: Better way to get all indices of array/tensor? + jacobian = torch.zeros(flow.event_shape * 2) + idxs = np.nonzero(np.ones(flow.event_shape)) + + # Have to permute elements for MADE + count_vars = len(idxs[0]) + if hasattr(params, "permutation"): + inv_permutation = np.zeros(count_vars, dtype=int) + inv_permutation[params.permutation] = np.arange(count_vars) + + # TODO: Vectorize numerical calculation of Jacobian with PyTorch + # TODO: Break this out into flowtorch.numerical.derivatives.jacobian + for var_idx in range(count_vars): + idx = [dim_idx[var_idx] for dim_idx in idxs] + epsilon_vector = torch.zeros(flow.event_shape) + epsilon_vector[(*idx,)] = epsilon + # TODO: Use scipy.misc.derivative or another library's function? + delta = ( + bij.forward(x + 0.5 * epsilon_vector) + - bij.forward(x - 0.5 * epsilon_vector) + ) / epsilon + + for var_jdx in range(count_vars): + jdx = [dim_jdx[var_jdx] for dim_jdx in idxs] + + # Have to account for permutation potentially introduced by MADE network + # TODO: Make this more general with structure abstraction + if hasattr(params, "permutation"): + jacobian[(inv_permutation[idx[0]], inv_permutation[jdx[0]])] = float( + delta[(Ellipsis, *jdx)].data.sum() + ) + else: + jacobian[(*idx, *jdx)] = float(delta[(Ellipsis, *jdx)].data.sum()) + + # For autoregressive flow, Jacobian is sum of diagonal, otherwise need full + # determinate + if hasattr(params, "permutation"): + numeric_ldt = torch.sum(torch.log(torch.diag(jacobian))) + else: + numeric_ldt = torch.log(torch.abs(jacobian.det())) + + ldt_discrepancy = (analytic_ldt - numeric_ldt).abs() + assert ldt_discrepancy < epsilon + + # Test that lower triangular with non-zero diagonal for autoregressive flows + if hasattr(params, "permutation"): + + def nonzero(x): + return torch.sign(torch.abs(x)) + + diag_sum = torch.sum(torch.diag(nonzero(jacobian))) + lower_sum = torch.sum(torch.tril(nonzero(jacobian), diagonal=-1)) + assert diag_sum == float(count_vars) + assert lower_sum == float(0.0) + + +def test_inverse(flow, epsilon=1e-5): + bij = flow.bijector + base_dist = flow.base_dist + + # Test g^{-1}(g(x)) = x + x_true = base_dist.sample(torch.Size([10])) + x_true = torch.distributions.transform_to(bij.domain)(x_true) + + y = bij.forward(x_true) + x_calculated = bij.inverse(y) + assert (x_true - x_calculated).abs().max().item() < epsilon + + # Test that Jacobian after inverse op is same as after forward + J_1 = bij.log_abs_det_jacobian(x_true, y) + J_2 = bij.log_abs_det_jacobian(x_calculated, y) + assert (J_1 - J_2).abs().max().item() < epsilon + + +""" +# TODO +def _test_shape(self, base_shape, transform): + pass + + +# TODO: This tests whether can take autodiff gradient without exception +def _test_autodiff(self, input_dim, transform, inverse=False): + pass +""" diff --git a/tests/test_compose.py b/tests/test_compose.py new file mode 100644 index 00000000..af2a6873 --- /dev/null +++ b/tests/test_compose.py @@ -0,0 +1,41 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +import flowtorch.bijectors as bijs +import flowtorch.distributions as dist +import flowtorch.parameters as params +import torch +import torch.distributions +import torch.optim + + +def test_compose(): + transforms = bijs.Compose( + bijectors=[ + bijs.AffineAutoregressive( + params.DenseAutoregressive(), + ), + bijs.AffineAutoregressive( + params.DenseAutoregressive(), + ), + bijs.AffineAutoregressive( + params.DenseAutoregressive(), + ), + ] + ) + + event_shape = (5,) + base_dist = torch.distributions.Independent( + torch.distributions.Normal( + loc=torch.zeros(event_shape), scale=torch.ones(event_shape) + ), + len(event_shape), + ) + flow = dist.Flow(base_dist, transforms) + + optimizer = torch.optim.Adam(flow.parameters()) + assert optimizer.param_groups[0]["params"][0].grad is None + flow.log_prob(torch.randn((100,) + event_shape)).sum().backward() + assert optimizer.param_groups[0]["params"][0].grad.abs().sum().item() > 1e-3 + optimizer.zero_grad() + assert optimizer.param_groups[0]["params"][0].grad.abs().sum().item() < 1e-3 diff --git a/tests/test_distribution.py b/tests/test_distribution.py new file mode 100644 index 00000000..542af70f --- /dev/null +++ b/tests/test_distribution.py @@ -0,0 +1,117 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + +import flowtorch.bijectors as bijs +import flowtorch.distributions as dist +import flowtorch.parameters as params +import scipy.stats +import torch +import torch.distributions +import torch.optim + + +def test_tdist_standalone(): + input_dim = 3 + + def make_tdist(): + # train a flow here + base_dist = torch.distributions.Independent( + torch.distributions.Normal(torch.zeros(input_dim), torch.ones(input_dim)), 1 + ) + bijector = bijs.AffineAutoregressive() + tdist = dist.Flow(base_dist, bijector) + return tdist + + tdist = make_tdist() + tdist.log_prob(torch.randn(input_dim)) # should run without error + assert True + + +def test_neals_funnel_vi(): + torch.manual_seed(42) + nf = dist.NealsFunnel() + bijector = bijs.AffineAutoregressive(params=params.DenseAutoregressive()) + + base_dist = torch.distributions.Independent( + torch.distributions.Normal(torch.zeros(2), torch.ones(2)), 1 + ) + flow = dist.Flow(base_dist, bijector) + bijector = flow.bijector + + opt = torch.optim.Adam(flow.parameters(), lr=2e-3) + num_elbo_mc_samples = 200 + for _ in range(100): + z0 = flow.base_dist.rsample(sample_shape=(num_elbo_mc_samples,)) + zk = bijector._forward(z0) + ldj = bijector._log_abs_det_jacobian(z0, zk) + + neg_elbo = -nf.log_prob(zk).sum() + neg_elbo += flow.base_dist.log_prob(z0).sum() - ldj.sum() + neg_elbo /= num_elbo_mc_samples + + if not torch.isnan(neg_elbo): + neg_elbo.backward() + opt.step() + opt.zero_grad() + + nf_samples = dist.NealsFunnel().sample((20,)).squeeze().numpy() + vi_samples = flow.sample((20,)).detach().numpy() + + assert scipy.stats.ks_2samp(nf_samples[:, 0], vi_samples[:, 0]).pvalue >= 0.05 + assert scipy.stats.ks_2samp(nf_samples[:, 1], vi_samples[:, 1]).pvalue >= 0.05 + + +""" +def test_conditional_2gmm(): + context_size = 2 + + flow = bijs.Compose( + bijectors=[ + bijs.AffineAutoregressive(context_size=context_size) + for _ in range(2) + ], + context_size=context_size, + ).inv() + + base_dist = dist.Normal(torch.zeros(2), torch.ones(2)) + new_cond_dist = flow(base_dist) + flow = new_cond_dist.bijector + + target_dist_0 = dist.Independent( + dist.Normal(torch.zeros(2) + 5, torch.ones(2) * 0.5), 1 + ) + target_dist_1 = dist.Independent( + dist.Normal(torch.zeros(2) - 5, torch.ones(2) * 0.5), 1 + ) + + opt = torch.optim.Adam(flow.params.parameters(), lr=1e-3) + + for idx in range(100): + opt.zero_grad() + + if idx % 2 == 0: + target_dist = target_dist_0 + context = torch.ones(context_size) + else: + target_dist = target_dist_1 + context = -1 * torch.ones(context_size) + + marginal = new_cond_dist.condition(context) + y = marginal.rsample((50,)) + loss = -target_dist.log_prob(y) + marginal.log_prob(y) + loss = loss.mean() + + if idx % 100 == 0: + print("epoch", idx, "loss", loss) + + loss.backward() + opt.step() + + assert ( + new_cond_dist.condition(torch.ones(context_size)).sample((1000,)).mean() - 5.0 + ).norm().item() < 1.0 + assert ( + new_cond_dist.condition(-1 * torch.ones(context_size)).sample((1000,)).mean() + + 5.0 + ).norm().item() < 1.0 +""" diff --git a/tests/test_imports.py b/tests/test_imports.py new file mode 100644 index 00000000..d807829b --- /dev/null +++ b/tests/test_imports.py @@ -0,0 +1,66 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT +import inspect + +import flowtorch +import flowtorch.bijectors +import flowtorch.distributions +import flowtorch.parameters +import flowtorch.utils + + +def test_parameters_imports(): + tst_imports( + "Parameters", + [cls for cls, _ in flowtorch.utils.list_parameters()], + [ + c + for c in flowtorch.parameters.__all__ + if inspect.isclass(flowtorch.parameters.__dict__[c]) + ], + ) + + +def test_bijector_imports(): + tst_imports( + "Bijector", + [cls for cls, _ in flowtorch.utils.list_bijectors()], + [ + c + for c in flowtorch.bijectors.__all__ + if inspect.isclass(flowtorch.bijectors.__dict__[c]) + ], + ) + + +def test_distribution_imports(): + tst_imports( + "Distribution", + [cls for cls, _ in flowtorch.utils.list_distributions()], + [ + c + for c in flowtorch.distributions.__all__ + if inspect.isclass(flowtorch.distributions.__dict__[c]) + ], + ) + + +def tst_imports(cls_name, detected, imported): + unimported = set(detected).difference(set(imported)) + undetected = set(imported).difference(set(detected)) + + error_msg = [] + if len(unimported): + error_msg.append( + f'The following {cls_name} classes are declared but not imported: \ +{", ".join(unimported)}' + ) + + if len(undetected): + error_msg.append( + f'The following {cls_name} classes are imported but not detected: \ +{", ".join(undetected)}' + ) + + if len(error_msg): + raise ImportError("\n".join(error_msg)) diff --git a/tests/test_interface.py b/tests/test_interface.py new file mode 100644 index 00000000..e5706d4b --- /dev/null +++ b/tests/test_interface.py @@ -0,0 +1,31 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# SPDX-License-Identifier: MIT + + +from flowtorch.docs import sorted_entity_names +from flowtorch.utils import InterfaceError + + +class TestInterface: + def test_documentable_case_insensitivity(self): + """ + Checks whether there are any two entities that are indistinguishable by + case. E.g. "flowtorch.params" the module and "flowtorch.Params" the + class. For producing the API docs on Windows and Mac OS systems, it is + advisable to have entity names that are unique regardless of case. + + """ + equivalence_classes = {} + for n in sorted_entity_names: + equivalence_classes.setdefault(n.lower(), []).append(n) + erroneous_equivalences = [ + f'{{{", ".join(v)}}}' for v in equivalence_classes.values() if len(v) > 1 + ] + + if len(erroneous_equivalences): + error_string = "\t\n".join(erroneous_equivalences) + raise InterfaceError( + f"""Documentable entities must be unique irrespective of case. The \ +following equivalences were found: + {error_string}""" + ) diff --git a/website/.gitignore b/website/.gitignore new file mode 100644 index 00000000..8a11ee68 --- /dev/null +++ b/website/.gitignore @@ -0,0 +1,22 @@ +# Dependencies +/node_modules + +# Production +/build + +# Generated files +.docusaurus +.cache-loader +/docs/api +api.sidebar.js + +# Misc +.DS_Store +.env.local +.env.development.local +.env.test.local +.env.production.local + +npm-debug.log* +yarn-debug.log* +yarn-error.log* diff --git a/website/README.md b/website/README.md new file mode 100644 index 00000000..ed310c91 --- /dev/null +++ b/website/README.md @@ -0,0 +1,41 @@ + + +This website is built using [Docusaurus 2](https://v2.docusaurus.io/), a modern static website generator, and hosted using GitHub pages at [https://flowtorch.ai](https://flowtorch.ai). The source for the website is located in [main/website](https://github.com/facebookincubator/flowtorch/tree/main/website) and it is hosted from the root directory of the [website](https://github.com/facebookincubator/flowtorch/tree/website) branch. + +## Preparation +1. Install [Node.js](https://nodejs.org/). +2. Install [Yarn](https://yarnpkg.com/): +```console +npm install --global yarn +``` +4. Navigate to [main/website](https://github.com/facebookincubator/flowtorch/tree/main/website) and install the dependencies: +```console +cd website +yarn install +``` + +## Local Development + +```console +yarn start +``` + +This command starts a local development server and open up a browser window. Most changes are reflected live without having to restart the server. + +## Build + +```console +yarn build +``` + +This command generates static content into the `website/build` directory, which is deployed by copying into the [gh-pages](https://github.com/facebookincubator/flowtorch/tree/gh-pages) branch. + +## Deployment + +Core developers can deploy the website as follows: + +```console +GIT_USER= USE_SSH=true yarn deploy +``` + +Activity logs for all past deployments to GitHub pages can be viewed [here](https://github.com/facebookincubator/flowtorch/deployments/activity_log?environment=github-pages). diff --git a/website/babel.config.js b/website/babel.config.js new file mode 100644 index 00000000..e00595da --- /dev/null +++ b/website/babel.config.js @@ -0,0 +1,3 @@ +module.exports = { + presets: [require.resolve('@docusaurus/core/lib/babel/preset')], +}; diff --git a/website/docs/dev/about.mdx b/website/docs/dev/about.mdx new file mode 100644 index 00000000..0fec8555 --- /dev/null +++ b/website/docs/dev/about.mdx @@ -0,0 +1,40 @@ +--- +id: about +title: About the Team +sidebar_label: About the Team +--- +:::info +*This could be you!* See [here](/dev) and [here](/dev/overview) for how to make an independent contribution to [FlowTorch](https://flowtorch.ai). We will consider adding new members to the core team for those who are interested and have made previous contributions. +::: + +## Core Team +The Core Developers team comprises [Stefan Webb](https://stefanwebb.me) (Team Leader), [Feynman Liang](https://feynmanliang.com/about/), and [Fritz Obermeyer](http://fritzo.org/). + +export const FlexContainer = ({children}) => ( +
+ {children} +
+); + + +
+
+
+ + +
+
+
+ + +
+
+
+ +## Contributors +:::info +Independent contributors will be recognized here when such contributions begin to flow (no pun intended)! +::: diff --git a/website/docs/dev/bibliography.mdx b/website/docs/dev/bibliography.mdx new file mode 100644 index 00000000..a547004a --- /dev/null +++ b/website/docs/dev/bibliography.mdx @@ -0,0 +1,89 @@ +--- +id: bibliography +title: Bibliography +sidebar_label: Bibliography +--- +:::info + +If you know of a paper or library that ought to be listed in this bibliography please let us know [in the forum](https://github.com/facebookincubator/flowtorch/discussions) or by [starting a pull request](https://github.com/facebookincubator/flowtorch/pulls). + +::: + +Here is a collation of materials related to the research, engineering, and teaching of normalizing flows. A corresponding BibTex file can be found [here](https://github.com/facebookincubator/flowtorch/blob/new_interface/website/static/assets/normalizing-flows.bib). + +## Surveys +
+[bond2021deep] Bond-Taylor, S., Leach, A., Long, Y., and Willcocks, C.G. Deep Generative Modelling: A Comparative Review of VAEs, GANs, Normalizing Flows, Energy-Based and Autoregressive Models. arXiv preprint arXiv:2103.04922, 2021. +
+ +
+[kobyzev2020normalizing] Kobyzev, I., Prince, S., and Brubaker, M. Normalizing flows: An introduction and review of current methods. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2020. +
+ +
+[papamakarios2019normalizing] Papamakarios, G., Nalisnick, E., Rezende, D., Mohamed, S., and Lakshminarayanan, B. Normalizing flows for probabilistic modeling and inference. arXiv preprint arXiv:1912.02762, 2019. +
+ +## Methodology +
+[dinh2014nice] Dinh, L., Krueger, D., and Bengio, Y. NICE: Non-linear Independent Components Estimation. Workshop contribution at the International Conference on Learning Representations (ICLR), 2015. +
+ +
+[dinh2016density] Dinh, L., Sohl-Dickstein, J., and Bengio, S. Density estimation using real NVP. Conference paper at the International Conference on Learning Representations (ICLR), 2017. +
+ +
+[durkan2019neural] Durkan, C., Bekasov, A., Murray, I., and Papamakarios, G. Neural spline flows. 33rd Conference on Neural Information Processing Systems (NeurIPS), 2019. +
+ +
+[germain2015made] Germain, M., Gregor, K., Murray, I., and Larochelle, H. MADE: Masked autoencoder for distribution estimation . International Conference on Machine Learning (ICML), 2015. +
+ +
+[kingma2016improving] Kingma, D.P., Salimans, T., Jozefowicz, R., Chen, X., Sutskever, I., and Welling, M. Improving variational inference with inverse autoregressive flow . 29th Conference on Neural Information Processing Systems (NeurIPS), 2016. +
+ +
+[papamakarios2017masked] Papamakarios, G., and Pavlakou, T., and Murray, I. Masked autoregressive flow for density estimation . 30th Conference on Neural Information Processing Systems (NeurIPS), 2017. +
+ +
+[rezende2015variational] Rezende, D., and Mohamed, S. Variational inference with normalizing flows . International Conference on Machine Learning (ICML), 2015. +
+ +## Applications +
+[jin2019unsupervised] Jin, L., and Doshi-Velez, F., and Miller, T., and Schwartz, L., and Schuler, W. Unsupervised learning of PCFGs with normalizing flow. 57th Annual Meeting of the Association for Computational Linguistics (ACL), 2019. +
+ +
+[kim2020wavenode] Kim, H., and Lee, H., and Kang, W. H., and Cheon, S. J., Choi, B. J., and Kim, N. S. WaveNODE: A Continuous Normalizing Flow for Speech Synthesis. 2nd workshop on Invertible Neural Networks, Normalizing Flows, and Explicit Likelihood Models (ICML 2020), 2020. +
+ +
+[yang2019pointflow] Yang, G., Huang, X., Hao, Z., Liu, M., Belongie, S., and Hariharan, B. Pointflow: 3d point cloud generation with continuous normalizing flows. IEEE/CVF International Conference on Computer Vision, 2019. +
+ +## Libraries +### PyTorch +
+[bingham2018pyro] Bingham, E., and Chen, J.P., Jankowiak, M., Obermeyer, F., Pradhan, N., Karaletsos, T., Singh, R., Szerlip, P., Horsfall, P., and Goodman, N.D. Pyro: Deep Universal Probabilistic Programming. Journal of Machine Learning Research (JMLR), 2018. +
+ +> The majority of [the bijections in Pyro](https://github.com/pyro-ppl/pyro/tree/dev/pyro/distributions/transforms) were written by the core developer, [Stefan Webb](https://stefanwebb.me), and [FlowTorch](https://flowtorch.ai) builds upon this code and the experience gained from it. + +
+[phan2019composable] Phan, D., Pradhan, N., and Jankowiak, M. Composable Effects for Flexible and Accelerated Probabilistic Programming in NumPyro. arXiv preprint arXiv:1912.11554, 2019. +
+ +## Other Related +### Probabilistic Graphical Models +
+[koller2009probabilistic] Koller, D. and Friedman, N. Probabilistic graphical models: principles and techniques. MIT Press, 2009. +
+ +
+[webb2017faithful] Webb, S., Golinski, A., Zinkov, R., Siddharth, N., Rainforth, T., Teh, Y.W., and Wood, F. Faithful inversion of generative models for effective amortized inference. 31th Conference on Neural Information Processing Systems (NeurIPS), 2018. +
diff --git a/website/docs/dev/bijector.mdx b/website/docs/dev/bijector.mdx new file mode 100644 index 00000000..39b76076 --- /dev/null +++ b/website/docs/dev/bijector.mdx @@ -0,0 +1,69 @@ +--- +id: bijector +title: Bijector Interface +sidebar_label: Bijector Interface +--- + +## The Interface +A class satisfying the "Bijector interface" contains the following elements. + +### Parent class +A bijector must inherit from [`flowtorch.bijectors.Bijector`](https://github.com/facebookincubator/flowtorch/blob/main/flowtorch/bijectors/base.py). This class defines important methods that are common to all bijectors such as `.inv` for defining an equivalent bijector swapping the forward and inverse operations. In the future, this parent class will be responsible for implementing [caching](/users/caching). + +### Domain and Codomain +`self.domain` and `self.codomain` are values of type `torch.distributions.constraint` and specify the range of valid inputs and outputs that a bijector acts upon, as well as the dimensionality of both. FlowTorch does not validate the values of the inputs or outputs using this information - it is mainly intended to be useful for users as documentation. + +However, the `.event_dim` property of both `self.domain` and `self.codomain` is important as it specifies whether a bijector operates over scalars (`event_dim=0`), vectors (`event_dim=1`), matrices (`event_dim=2`), etc., and this determines the shapes of a transformed distribution using the bijector. + +`self.domain` and `self.codomain` are typically *class properties*, although they can be *instance properties* where that makes sense, for example when a bijector operates on a different sized input depending on parameters passed to `__init__`. + +### Other Metadata +Further metadata about a bijector is defined in these properties: +* `autoregressive`: a bijector operating on vectors is autoregressive if $x_i$ is independent of $x_j$ for all $j>i$. Note that the order of autoregression may not be the same order as the PyTorch tensor since the bijector or its conditioning network may apply a permutation. We can generalize this in a straightforward way for bijectors operating on matrices, tensors, and higher-dimensional objects. This property is used by the testing framework. +* `near_identity_initialization`: whether a bijector is initialized to an "almost-identity" operation. In this context, a bijector is defined as being "almost-identity" if $y=f(x)$ does not diverge too much from a standard (multivariate) normal distribution when $x$ has a standard normal distribution. +* `volume_preserving`: a bijector is volume preserving, also known as *homomorphic*, if the volume of $\{f(x)\mid x \in A\}$ is the same as the volume of $A$ for all $A\subseteq\text{domain}(f)$. This is true of many bijections used in normalizing flows (examples to follow when we've moved across all the bijections from Pyro). + +Again, this metadata can be represented by *class properties* and *instance properties* depending on the context. For instance, a bijector may not be volume preserving by default and have a special volume preserving version that is enabled by a flag passed to `__init__`. + +:::info +Further metadata fields may be defined in the future. However, developers are not permitted to define their own without adding a default value to [`flowtorch.bijectors.Bijector`](https://github.com/facebookincubator/flowtorch/blob/main/flowtorch/bijectors/base.py). +::: + +### Class Methods +Class methods define initialization of the bijector, the forward ($y=f(x)$) and inverse ($x=f^{-1}(y)$) operators, the log absolute determinant Jacobian ($\log(|\det(dy/dx)|)$), and methods that define the shapes of $f$, $f^{-1}$, and its parameters. All methods are optional save for `._forward` and `._inverse` - the defaults for the others are the same as those of the identity operation (see [`flowtorch.bijectors.Bijector`](https://github.com/facebookincubator/flowtorch/blob/main/flowtorch/bijectors/base.py)). + +#### `.__init__(self, param_fn: flowtorch.params.Params, *, **kwargs)` +This optional method initializes a bijector, taking a `flowtorch.params.Params` object and an arbitrary number of keyword arguments specific to the bijector. It must call the parent initializer, passing the value of `param_fn`, that is, `super().__init__(param_fn=param_fn)`. Typically, the initializer is used to store parameters of the bijector and sometimes modify its metadata. + +*`__init__` must have sensible default values for all its arguments so that one can instantiate a bijector with, for example, `b = MyBijector()`.* This design allows both easy creation and testing of bijectors. + +#### `._forward(self, x: torch.Tensor, params: Optional[flowtorch.params.ParamsModule])` +#### `._inverse(self, y: torch.Tensor, params: Optional[flowtorch.params.ParamsModule])` +These methods defines the forward, $y=f_\theta(x)$, and inverse, $x=f^{-1}_\theta(y)$, operations of a bijector, respectively. + +By convention, when a bijector has either a forward or inverse operation that does not have an explicit formula or that is intractable, the forward operation will be defined by the tractable operation and the inverse will be left undefined (and you can obtain the inverted bijector with `.inv`). [Caching](/users/caching) is useful in these circumstances to apply the intractable operation to inputs that have previously been used with the bijector. + +#### `._log_abs_det_jacobian(self, x: torch.Tensor, y: torch.Tensor, params: Optional[flowtorch.params.ParamsModule])` +This methods defines the log absolute determinant Jacobian, $\log(|\det(dy/dx)|)$, that determines how the functional form of the bijector warps an infinitesimally small volume of space. Since it may be easier to calculate this using one of either $x$ or $y$, both are given as arguments - it is up to the caller to ensure that $y=f(x)$. + +If this method is undefined, it will default to a tensor of zeros, that is, the quantity in question for a volume preserving bijector. + +#### `.forward_shape(self, event_shape)` +#### `.inverse_shape(self, event_shape)` +`.forward_shape` defines the event shape of $y=f(x)$ given the event shape of $x$. Similarly, `.inverse_shape` defines the event shape of $x=f^{-1}(y)$ given the event shape of $y$. These methods provide additional flexibility for defining bijectors, although in most cases will be left undefined in the derived class so that the default of the identity function is used. One example of where these methods differ from the identity is in [flowtorch.Reshape](/users/composing). + +:::info +It must be the case that `len(event_shape) == self.domain.event_dim` for `.forward_shape` and `len(event_shape) == self.codomain.event_dim` for `.inverse_shape`. Likewise, the outputs of these two methods must match the corresponding `event_dim`. +::: + +#### `.params_shape(self, event_shape)` +This method defines the shapes of the parameters for a given event shape. It returns a tuple of shapes of type `torch.Size()`. For instance, if there are two separate scalar parameters for each event dimension, we could implement this method as: + +```python + def params_shape(self, event_shape:torch.Size) -> Tuple[torch.Size]: + return (event_shape, event_shape) +``` + +:::note +Yet to be decided: what do we want as the convention when a `Bijector` does not use any parameters? Should it return `None` or a single `torch.Size()`? +::: diff --git a/website/docs/dev/contributing.md b/website/docs/dev/contributing.md new file mode 100644 index 00000000..d39e8b55 --- /dev/null +++ b/website/docs/dev/contributing.md @@ -0,0 +1,28 @@ +--- +id: contributing +title: Help Wanted! +sidebar_label: Help Wanted! +slug: /dev +--- +:::info +Please contact us in [the forum](https://github.com/facebookincubator/flowtorch/discussions) if you are interested in becoming an independent contributor and tag your discussion with ":bulb: Ideas" - the process is outlined [here](/dev/overview). +::: + +## Call for Contributions +We are looking for independent collaborators to: +* add new bijectors (i.e., normalizing flow transforms) and parameters (i.e., conditioning networks); +* [discover and fix bugs](https://github.com/facebookincubator/flowtorch/issues/new/choose); and, +* write tutorials on [applications of Normalizing Flow methodology](/dev/bibliography#applications). + +The [Core Developers](/dev/about) are able to help smooth [the process of making a contribution](/dev/overview). + + +## Why Contribute? +Why would you freely give up your labour and contribute to an open-source project? Firstly, contributing to an open-source project is excellent for one's professional development as a software engineer. Take the example of the author, who started contributing to an open-source project during his PhD and developed important DevOp skills. It was greatly responsible for his scoring an internship in Industry and kickstarting his career. Contributors will be recognised both [here](/dev/about#contributors) and [here](https://github.com/facebookincubator/flowtorch/graphs/contributors). + +Another reason is that [it is inherently satisfying to make](https://en.wikipedia.org/wiki/Maker_culture). By contributing to [FlowTorch](https://flowtorch.ai) you will be creating useful components that will have a concrete impact. Finally, a main motivation behind [FlowTorch](https://flowtorch.ai) is to advance scientific knowledge around representing probability distributions and their applications - by contributing to [FlowTorch](https://flowtorch.ai) you are contributing to the advancement of science! + +See [here](https://opensource.guide/how-to-contribute/) for a more detailed essay on the philosophy of open-source. + +## Code of Conduct +As a contributor, you agree to abide by the [Contributor Covenant Code of Conduct](https://github.com/facebookincubator/flowtorch/blob/main/CODE_OF_CONDUCT.md). In a nutshell, the code says [Be Excellent to Each Other!](https://www.youtube.com/watch?v=rph_1DODXDU) Please report any suspected violations to the Core Developers. diff --git a/website/docs/dev/docs.md b/website/docs/dev/docs.md new file mode 100644 index 00000000..cb3b7b19 --- /dev/null +++ b/website/docs/dev/docs.md @@ -0,0 +1,15 @@ +--- +id: docs +title: Docs +sidebar_label: Docs +--- +:::info +The easiest way to write a docstring that adheres to FlowTorch conventions is to copy one from a pre-existing class and adapt it to your case. +::: + +## Docstrings +It is crucial to add an informative [docstring](https://www.python.org/dev/peps/pep-0257/#id15) to new `bij.Bijector` and `params.Parameters` classes. This docstring should detail what the class does, its functional form, the meaning of *all* input arguments and returned values, and references to any relevant literature. + +References should link to their citation in the [bibliography](/dev/bibliography), for example, with [https://flowtorch.ai/dev/bibliography#dinh2014nice](https://flowtorch.ai/dev/bibliography#dinh2014nice). This means you may need to add additional citations to the website with your `Bijector` or `Parameters` implementation. + +Be sure to test the formatting of the docstring in the docs using the workflow detailed [here](/dev/ops). diff --git a/website/docs/dev/ops.md b/website/docs/dev/ops.md new file mode 100644 index 00000000..68871918 --- /dev/null +++ b/website/docs/dev/ops.md @@ -0,0 +1,91 @@ +--- +id: ops +title: Continuous Integration +sidebar_label: Continuous Integration +--- +:::info +Please do not feel intimidated by the thought of having to make your code pass the CI tests! The core developer team is happy to work closely with contributors to integrate their code and merge PRs. +::: + +FlowTorch uses [GitHub Actions](https://docs.github.com/en/actions) to run code quality tests on pushes or pull requests to the `main` branch, a process known as [continuous integration](https://en.wikipedia.org/wiki/Continuous_integration) (CI). The tests are run for Python versions 3.7, 3.8, and 3.9, and must be successful for a PR to be merged into `main`. All workflow runs can be viewed [here](https://github.com/facebookincubator/flowtorch/actions), or else viewed from the link at the bottom of the [PR](https://github.com/facebookincubator/flowtorch/pulls) in question. + + +## Workflow Steps +The definition of the steps performed in the build workflow is found [here](https://github.com/facebookincubator/flowtorch/blob/main/.github/workflows/python-package.yml) and is as follows: + +1. The version of Python (3.7, 3.8, or 3.9) is installed along with the developer dependencies of FlowTorch; +```bash +python -m pip install --upgrade pip +python -m pip install flake8 black usort pytest mypy +pip install numpy +pip install --pre torch torchvision torchaudio +pip install -e .[dev] +``` + +2. Each Python source is checked for containing the mandatory copyright header by a [custom script](https://github.com/facebookincubator/flowtorch/blob/main/scripts/copyright_headers.py): +```bash +python scripts/copyright_headers.py --check flowtorch tests scripts examples +``` + +3. The formatting of the Python code in the [library](https://github.com/facebookincubator/flowtorch/tree/main/flowtorch) and [tests](https://github.com/facebookincubator/flowtorch/tree/main/tests) is checked to ensure it follows a standard using [`black`](https://black.readthedocs.io/en/stable/); +```bash +black --check flowtorch tests +``` +4. Likewise, the order and formatting of Python `import` statements in the same folders is checked to ensure it follows a standard using [`usort`](https://usort.readthedocs.io/en/stable/); +```bash +usort check flowtorch tests +``` +5. A [static code analysis](https://en.wikipedia.org/wiki/Static_program_analysis), or rather, linting, is performed by [`flake8`](https://flake8.pycqa.org/en/latest/) to find potential bugs; +```bash +flake8 . tests --count --show-source --statistics +``` +6. FlowTorch makes use of type hints, which we consider mandatory for all contributed code, and static types are checked with [`mypy`](https://github.com/python/mypy); +```bash +mypy --disallow-untyped-defs flowtorch +``` +7. Unit tests: + +pytest + XML coverage report +```bash +pytest --cov=tests --cov-report=xml -W ignore::DeprecationWarning tests/ +``` + +8. The coverage report is uploaded to [Codecov](https://about.codecov.io/) with a [GitHub Action](https://github.com/codecov/codecov-action). This allows us to analyze the results and produce the percentage of code covered badge. + +If any step fails, the workflow fails and you will not be able to merge the PR into `main`. + +## Successful Commits +To ensure your PR passes, you should perform these steps *before pushing your local commits to the remote branch*. + +### Run Tests +Run the tests first so that you can do the code formatting just once as the final step: +```bash +pytest tests -W ignore::DeprecationWarning +``` +Fix any failing tests until the above command succeeds. + +### Check Types +Check that there are no errors with the type hints: +```bash +mypy --disallow-untyped-defs flowtorch +``` +I find this is one of the most difficult steps to make pass - if you require assistance, comment on your PR, tagging the core developers. + +### Formatting and Linting +Having ensured the tests and docs are correct, run the following commands to standardize your code's formatting: +```bash +black flowtorch tests +usort format flowtorch tests +``` +Now, run these commands in check mode to ensure there are no errors: +```bash +black --check flowtorch tests +usort check flowtorch tests +``` +It is possible you may need to fix some errors by hand. + +Finally, run the linter and fix any resulting errors: +```bash +flake8 flowtorch tests +``` +At this point, you are ready to commit your changes and push to the remote branch - you're a star! :star: From there, your PR will be reviewed by the core developers and after any modifications are made, merged to the `main` branch. diff --git a/website/docs/dev/overview.md b/website/docs/dev/overview.md new file mode 100644 index 00000000..f18c6c35 --- /dev/null +++ b/website/docs/dev/overview.md @@ -0,0 +1,40 @@ +--- +id: overview +title: Overview +sidebar_label: Overview +--- +:::info +If you are having trouble getting the CI tests to pass, you may create a PR, regardless, in order to get a review and help from the core developers. +::: + +:::info +It is preferable to write smaller, incremental PRs as opposed to larger, monolithic ones. Aim to modify only a few files and add less than 500 lines of code. +::: + +[FlowTorch](https://flowtorch.ai) is designed with easy extensibility in mind. In this section, we detail the interfaces for normalizing flow bijections and conditioning networks, as well as the software practices that must be followed. First, however, let us explain the process for making a contribution to [FlowTorch](https://flowtorch.ai). + +## How to Make a Contribution +### Ideation +New features begin with a discussion between users, independent contributors (that's you!), and the core development team. If you would like to see a new feature or are interested in contributing it yourself, please start a new thread on the forum, tagging it with "new feature." + +### Development +After this discussion has taken place and the details of new feature has been decided upon, the next step is to fork the [flowtorch repo](https://github.com/facebookincubator/flowtorch) using the "Fork" button in the upper right corner. + +Next, clone your forked repository locally and create a feature branch: + +```bash +git clone https://github.com//flowtorch.git +cd flowtorch +git checkout -b +``` + +Create your new feature. Ensure you have [added a docstring](/dev/docs) to your new class. + +Follow the steps [here](/dev/ops#successful-commits) to ensure that your code is formatted correctly, passes type checks, unit tests, the docs build, and so on. + +Assuming it passes these tests, commit the changes to your local repo and push to your remote fork. + +### Review +Finally, create a [pull request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) (PR) to merge your forked feature branch into the [main branch](https://github.com/facebookincubator/flowtorch). Give an informative name to your PR and include in the description of your PR the details of which features are added. Ensure your feature branch contains the latest commits from the main branch so as to avoid merge conflicts. + +The core developers will review your PR and most likely suggest changes to the code. After edits have been made, pushing to the feature branch of your forked remote will update the existing PR that you have opened. After all edits have been made and the tests pass, the core developers will merge your code into the main branch! diff --git a/website/docs/dev/params.mdx b/website/docs/dev/params.mdx new file mode 100644 index 00000000..2da2989f --- /dev/null +++ b/website/docs/dev/params.mdx @@ -0,0 +1,45 @@ +--- +id: params +title: Params Interface +sidebar_label: Params Interface +--- + +## The Interface +A class satisfying the "Params interface" contains the following elements. + +### Parent class +A bijector must inherit from [`flowtorch.params.Params`](https://github.com/facebookincubator/flowtorch/blob/main/flowtorch/params/base.py). This class defines important methods that are common to all parameter objects, such as `.__call__` for instantiating a [`flowtorch.params.ParamsModule`](https://github.com/facebookincubator/flowtorch/blob/main/flowtorch/params/base.py) given shape information. + +### Metadata +The following property is the only one currently used: +* `autoregressive`: a parameter object operating on vectors is autoregressive if the output $x_i$ is not a function of any $x_j$ with $j>i$ (with a straightforward generalization to higher-dimensional objects). This property is used by the testing framework. + +:::info +In the near future, the `autoregressive` property is likely to be removed, and a [structured representation](/users/structure) API used instead. +::: + +:::info +Further metadata fields may be defined in the future. However, developers are not permitted to define their own without adding a default value to [`flowtorch.params.Params`](https://github.com/facebookincubator/flowtorch/blob/main/flowtorch/params/base.py). +::: + +### Class Methods +Class methods define the initization of the lazy parameter object, how to instantiate the parameter object - that is, create any parameter vectors and neural networks, given shape information - and how to calculate the value of the parameters given a value from the distribution and possibly a context variable that is conditioned upon. + +#### `.__init__(self, *, **kwargs)` +This optional method initializes a lazy parameter object, taking an arbitrary number of keyword arguments specific to the class. It must call the parent initializer as `super().__init__()`. Typically, the initializer is used to store settings and sometimes modify metadata. + +*`__init__` must have sensible default values for all its arguments so that one can instantiate a params object with, for example, `p = Params()`.* This design allows both easy creation and testing of params. + +#### `._build(self, input_shape: torch.Size, param_shapes: Sequence[torch.Size], context_dims: int) -> Tuple[nn.ModuleList, Dict[str, Any]]` +This method builds any necessary `nn.Parameters` or `nn.Module`s as well as buffer objects, given the shape of an input, `input_shape`, the output shapes, `param_shapes`, and the number of dimensions, `context_dims`, of an optional context variable. It returns a tuple consisting of an `nn.ModuleList` for the learnable parameters and an optional `Dict[str, Any]` mapping strings to buffer objects. + +Buffer objects differ from learnable parameters in that they do not partake in gradient descent updates, but share with parameters that they are serialized when the object is saved and loaded to disk. Buffers are typically used to store tensors that are convenient to calculate and cache during the construction of the object, such as masking matrices. + +The `._build` method is called by `Params.__call__` during the process of instantiating a non-lazy `flowtorch.params.ParamsModule` using the lazy `flowtorch.params.Params` and specified shapes. `._build` should operate on any arbitrary input and parameter shapes. + +#### `._forward(self, x: torch.Tensor, context: torch.Tensor, modules: nn.ModuleList) -> Sequence[torch.Tensor]` +This method evaluates the parameters, $\theta=f(x;z,\{\alpha_i\})$, which in general are a function of the input, $x$, context variable, $z$, and a list of modules, $\{\alpha_i\}$. Note that this may not always be the case, for instance, when the parameters are `nn.Parameter` tensors that do not depend on $x$, or when the `Params` object is a placeholder for no parameters. + +:::note +Certain `Params` are incompatible with certain `Bijector`s. For example, an autoregressive bijector requires an autoregressive params. We are currently deciding on a solution to enforce/check this and will likely release with v2 of the library. +::: diff --git a/website/docs/dev/releases.md b/website/docs/dev/releases.md new file mode 100644 index 00000000..937d5ac9 --- /dev/null +++ b/website/docs/dev/releases.md @@ -0,0 +1,15 @@ +--- +id: releases +title: Releases +sidebar_label: Releases +--- + +A list of FlowTorch releases is to be found [here](https://github.com/facebookincubator/flowtorch/releases). In this section, we detail the process of making a release. + +## Versioning Scheme +The versioning scheme we use is a simple system with versions of the form *<major>.<minor>[.dev<build>]*. Some examples are: +* `0.5`; +* `1.4`; and, +* `0.0.dev1`. + +We use [`setuptools_scm`](https://github.com/pypa/setuptools_scm) to automatically handle versions, and it is able to bump the version for builds without `.dev`. A description of how [`setuptools_scm`](https://github.com/pypa/setuptools_scm) handles versioning can be found [here](https://github.com/pypa/setuptools_scm/#default-versioning-scheme). diff --git a/website/docs/dev/tests.md b/website/docs/dev/tests.md new file mode 100644 index 00000000..635e64ce --- /dev/null +++ b/website/docs/dev/tests.md @@ -0,0 +1,9 @@ +--- +id: tests +title: Tests +sidebar_label: Tests +--- + +All `bijector.Bijector` and `params.Parameters` classes are covered by unit tests that test that the interface is satisfied, correct shape information is produced, and in the case of bijectors, that the log determinate absolute Jacobian is correct, amongst other things. + +In general, you will not need to write new unit tests. When you implement a new component it will be detected by the library and included in existing tests. diff --git a/website/docs/users/bijectors.md b/website/docs/users/bijectors.md new file mode 100644 index 00000000..230c0f5c --- /dev/null +++ b/website/docs/users/bijectors.md @@ -0,0 +1,11 @@ +--- +id: bijectors +title: Bijectors +sidebar_label: Bijectors +--- + +:::caution + +This document is under construction! + +::: diff --git a/website/docs/users/caching.md b/website/docs/users/caching.md new file mode 100644 index 00000000..a8f7bd33 --- /dev/null +++ b/website/docs/users/caching.md @@ -0,0 +1,13 @@ +--- +id: caching +title: Caching +sidebar_label: Caching +--- + +:::caution + +This document is under construction! + +::: + +* Issue of cache being invalidated when you update the parameters! diff --git a/website/docs/users/composing.md b/website/docs/users/composing.md new file mode 100644 index 00000000..e0192972 --- /dev/null +++ b/website/docs/users/composing.md @@ -0,0 +1,17 @@ +--- +id: composing +title: Composing Bijectors +sidebar_label: Composing Bijectors +--- +:::caution + +This document is under construction! + +::: + +There are several ways to compose `Bijector`s to form new ones. + +* `flowtorch.Cat`: ? +* `flowtorch.Compose`: function composition +* `flowtorch.Reshape`: change the event shape of a bijector +* `flowtorch.Stack`: ? diff --git a/website/docs/users/conditional.mdx b/website/docs/users/conditional.mdx new file mode 100644 index 00000000..0cc20e48 --- /dev/null +++ b/website/docs/users/conditional.mdx @@ -0,0 +1,29 @@ +--- +id: conditional +title: Conditional Bijections +sidebar_label: Conditional Bijections +--- + +## Background + +In many cases, we wish to represent conditional rather than joint distributions. For instance, in performing variational inference, the variational family is a class of conditional distributions, + +$$ +\begin{aligned} +\{q_\psi(\mathbf{z}\mid\mathbf{x})\mid\theta\in\Theta\}, +\end{aligned} +$$ + +where $\mathbf{z}$ is the latent variable and $\mathbf{x}$ the observed one, that hopefully contains a member close to the true posterior of the model, $p(\mathbf{z}\mid\mathbf{x})$. In other cases, we may wish to learn to generate an object $\mathbf{x}$ conditioned on some context $\mathbf{c}$ using $p_\theta(\mathbf{x}\mid\mathbf{c})$ and observations $\{(\mathbf{x}_n,\mathbf{c}_n)\}^N_{n=1}$. For instance, $\mathbf{x}$ may be a spoken sentence and $\mathbf{c}$ a number of speech features. + +The theory of Normalizing Flows is easily generalized to conditional distributions. We denote the variable to condition on by $C=\mathbf{c}\in\mathbb{R}^M$. A simple multivariate source of noise, for example a standard i.i.d. normal distribution, $X\sim\mathcal{N}(\mathbf{0},I_{D\times D})$, is passed through a vector-valued bijection that also conditions on C, $g:\mathbb{R}^D\times\mathbb{R}^M\rightarrow\mathbb{R}^D$, to produce the more complex transformed variable $Y=g(X;C=\mathbf{c})$. In practice, this is usually accomplished by making the parameters for a known normalizing flow bijection $g$ the output of a hypernet neural network that inputs $\mathbf{c}$. + +Sampling of conditional transforms simply involves evaluating $Y=g(X; C=\mathbf{c})$. Conditioning the bijections on $\mathbf{c}$, the same formula holds for scoring as for the joint multivariate case. + +## Conditioning Transformed Distributions +:::caution +The examples in this section makes use of `Bijector` classes that are not yet available - they will be added in the `v0.3` release. The concepts are still relevant. +::: +[`Bijector`s have no notion of conditionality!] + +## Next Steps diff --git a/website/docs/users/conditioning.md b/website/docs/users/conditioning.md new file mode 100644 index 00000000..b7813bd0 --- /dev/null +++ b/website/docs/users/conditioning.md @@ -0,0 +1,11 @@ +--- +id: conditioning +title: Conditioning +sidebar_label: Conditioning +--- + +:::caution + +This document is under construction! + +::: diff --git a/website/docs/users/constraints.md b/website/docs/users/constraints.md new file mode 100644 index 00000000..7a4f1adc --- /dev/null +++ b/website/docs/users/constraints.md @@ -0,0 +1,11 @@ +--- +id: constraints +title: Constraints +sidebar_label: Constraints +--- + +:::caution + +This document is under construction! + +::: diff --git a/website/docs/users/gpu_support.md b/website/docs/users/gpu_support.md new file mode 100644 index 00000000..7c96e582 --- /dev/null +++ b/website/docs/users/gpu_support.md @@ -0,0 +1,9 @@ +--- +id: gpu_support +title: GPU Support +sidebar_label: GPU Support +--- + +:::info +FlowTorch bijectors, conditioning networks, and transformed distributions are likely to work on GPUs but have not yet been fully tested. Full GPU support is expected for our `v0.2` release and until then this page will serve as a placeholder. +::: diff --git a/website/docs/users/initialization.md b/website/docs/users/initialization.md new file mode 100644 index 00000000..39352147 --- /dev/null +++ b/website/docs/users/initialization.md @@ -0,0 +1,11 @@ +--- +id: initialization +title: Initialization +sidebar_label: Initialization +--- + +:::caution + +This document is under construction! + +::: diff --git a/website/docs/users/installation.md b/website/docs/users/installation.md new file mode 100644 index 00000000..c2f1952f --- /dev/null +++ b/website/docs/users/installation.md @@ -0,0 +1,35 @@ +--- +id: installation +title: Installation +sidebar_label: Installation +--- + +[FlowTorch](https://flowtorch.ai) can be installed as a package or directly from source. + +## Requirements + +Python 3.7 or later is required. Other requirements will be downloaded by `pip` according to [setup.py](https://github.com/facebookincubator/flowtorch/blob/main/setup.py). + +## Pre-release + +As [FlowTorch](https://flowtorch.ai) is currently under rapid development, we recommend installing the [latest commit](https://github.com/facebookincubator/flowtorch/commits/main) from GitHub: + + git clone https://github.com/facebookincubator/flowtorch.git + cd flowtorch + pip install -e . + +Updates can then be performed by navigating to the directory where you cloned [FlowTorch](https://flowtorch.ai) and running: + + git pull + +## Latest Release + +Alternatively, the [latest release](https://github.com/facebookincubator/flowtorch/releases) is installed from [PyPI](https://pypi.org/project/flowtorch/): + + pip install flowtorch + +## Developers + +[Additional libraries](https://github.com/facebookincubator/flowtorch/blob/main/setup.py#L14) required for development are installed by replacing the above `pip` command with: + + pip install -e .[dev] diff --git a/website/docs/users/intro.mdx b/website/docs/users/intro.mdx new file mode 100644 index 00000000..33b703e5 --- /dev/null +++ b/website/docs/users/intro.mdx @@ -0,0 +1,27 @@ +--- +id: introduction +title: Introduction +sidebar_label: Introduction +slug: /users +--- + +## What is a Normalizing Flow? +Normalizing Flows are a family a methods for representing and learning high-dimensional probability distributions. They have found [state-of-the-art applications](/dev/bibliography#applications) in modeling complex distributions over images, [speech](/dev/bibliography#kim2020wavenode), [syntactic structure](/dev/bibliography#jin2019unsupervised), and molecules, to name a few. *Simply put, a Normalizing Flow is a composition of learnable functions that inputs samples from a simple random distribution, typically Gaussian noise, and outputs samples from a more complex target distribution.* Here is an illustration ([taken with permission from here](https://github.com/janosh/awesome-normalizing-flows)): + +

+ +

+ +A simple source of noise, $z_0$, is passed through a number of invertible functions, $f_1,f_2,\ldots,f_k$ to produce a more complex random variable, $z_k$. The invertible functions are constructed in a clever way so that we can easily sample from $z_k$ and calculate its density function, $p_k(\cdot)$. The field of Normalizing Flows can be seen as a modern take on the [change of variables method for random distributions](https://en.wikipedia.org/wiki/Probability_density_function#Function_of_random_variables_and_change_of_variables_in_the_probability_density_function), where the transformations are *high-dimensional*, often employing *neural networks*, and are designed for *effective stochastic optimization*. + +We believe, although still a nascent field, that Normalizing Flows are a fundamental component of the modern Bayesian statistics and probabilistic computing toolkit, and we will likely see many more exciting applications in the near future. + +## What is FlowTorch? +[FlowTorch](https://flowtorch.ai) is a library that provides PyTorch components for constructing Normalizing Flows using the latest research in the field. It builds on an earlier sub-library of code from [Pyro](https://github.com/pyro-ppl/pyro/tree/dev/pyro/distributions/transforms) developed by the author since 2018. The main goals behind creating a new library for Normalizing Flows are to: +* define an elegant interface for Normalizing Flow methodology, building on our experience with Pyro, so that practitioners can easily utilize these methods and researchers can easily contribute their own implementations; +* develop robust unit tests and other code quality practices to guarantee production quality code; +* promote the methods in applied settings by fostering a community of Normalizing Flow practioners and linking them with researchers; +* accelerate research in Normalizing Flows by providing standard implementations, benchmarking, and a comprehensive literature survey. + +## Where to From Here? +We recommend reading the next two sections to [install FlowTorch](/users/installation) and [train your first Normalizing Flow](/users/start). For more theoretical background on Normalizing Flows and information about their applications, see the primer [here](/users/univariate) and the list of survey papers [here](/dev/bibliography#surveys). diff --git a/website/docs/users/methods.md b/website/docs/users/methods.md new file mode 100644 index 00000000..4e230c31 --- /dev/null +++ b/website/docs/users/methods.md @@ -0,0 +1,11 @@ +--- +id: methods +title: Table of Methods +sidebar_label: Table of Methods +--- + +:::info + +This page will contain a table of `Bijector` and `Params` classes in FlowTorch with information such as their functional form, domain, range, computation/memory complexity, and literature references. Until our `v0.2` release, this is a placeholder. + +::: diff --git a/website/docs/users/multivariate.mdx b/website/docs/users/multivariate.mdx new file mode 100644 index 00000000..e961d94f --- /dev/null +++ b/website/docs/users/multivariate.mdx @@ -0,0 +1,133 @@ +--- +id: multivariate +title: Multivariate Bijections +sidebar_label: Multivariate Bijections +--- + +## Background +The fundamental idea of normalizing flows also applies to multivariate random variables, and this is where its value is clearly seen - *representing complex high-dimensional distributions*. In this case, a simple multivariate source of noise, for example a standard i.i.d. normal distribution, $X\sim\mathcal{N}(\mathbf{0},I_{D\times D})$, is passed through a vector-valued bijection, $g:\mathbb{R}^D\rightarrow\mathbb{R}^D$, to produce the more complex transformed variable $Y=g(X)$. + +Sampling $Y$ is again trivial and involves evaluation of the forward pass of $g$. We can score $Y$ using the multivariate substitution rule of integral calculus, + +$$ +\begin{aligned} + \mathbb{E}_{p_X(\cdot)}\left[f(X)\right] &= \int_{\text{supp}(X)}f(\mathbf{x})p_X(\mathbf{x})d\mathbf{x}\\ + &= \int_{\text{supp}(Y)}f(g^{-1}(\mathbf{y}))p_X(g^{-1}(\mathbf{y}))\det\left|\frac{d\mathbf{x}}{d\mathbf{y}}\right|d\mathbf{y}\\ + &= \mathbb{E}_{p_Y(\cdot)}\left[f(g^{-1}(Y))\right], + \end{aligned} +$$ + +where $d\mathbf{x}/d\mathbf{y}$ denotes the Jacobian matrix of $g^{-1}(\mathbf{y})$. Equating the last two lines we get, + +$$ +\begin{aligned} + \log(p_Y(y)) &= \log(p_X(g^{-1}(y)))+\log\left(\det\left|\frac{d\mathbf{x}}{d\mathbf{y}}\right|\right)\\ + &= \log(p_X(g^{-1}(y)))-\log\left(\det\left|\frac{d\mathbf{y}}{d\mathbf{x}}\right|\right). +\end{aligned} +$$ + +Inituitively, this equation says that the density of $Y$ is equal to the density at the corresponding point in $X$ plus a term that corrects for the warp in volume around an infinitesimally small volume around $Y$ caused by the transformation. For instance, in $2$-dimensions, the geometric interpretation of the absolute value of the determinant of a Jacobian is that it represents the area of a parallelogram with edges defined by the columns of the Jacobian. In $n$-dimensions, the geometric interpretation of the absolute value of the determinant Jacobian is that is represents the hyper-volume of a parallelepiped with $n$ edges defined by the columns of the Jacobian (see a calculus reference such as \[7\] for more details). + +Similar to the univariate case, we can compose such bijective transformations to produce even more complex distributions. By an inductive argument, if we have $L$ transforms $g_{(0)}, g_{(1)},\ldots,g_{(L-1)}$, then the log-density of the transformed variable $Y=(g_{(0)}\circ g_{(1)}\circ\cdots\circ g_{(L-1)})(X)$ is + +$$ +\begin{aligned} + \log(p_Y(y)) &= \log\left(p_X\left(\left(g_{(L-1)}^{-1}\circ\cdots\circ g_{(0)}^{-1}\right)\left(y\right)\right)\right)+\sum^{L-1}_{l=0}\log\left(\left|\frac{dg^{-1}_{(l)}(y_{(l)})}{dy'}\right|\right), +\end{aligned} +$$ + +where we've defined $y_{(0)}=x$, $y_{(L-1)}=y$ for convenience of notation. + +The main challenge is in designing parametrizable multivariate bijections that have closed form expressions for both $g$ and $g^{-1}$, a tractable Jacobian whose calculation scales with $O(D)$ or $O(1)$ rather than $O(D^3)$, and can express a flexible class of functions. + +## Multivariate `Bijector`s +In this section, we show how to use `bij.SplineAutoregressive` to learn the bivariate toy distribution from our running example. Making a simple change we can represent bivariate distributions of the form, $p(x_1,x_2)=p(x_1)p(x_2|x_1)$: + +```python +dist_x = torch.distributions.Independent( + torch.distributions.Normal(torch.zeros(2), torch.ones(2)), + 1 +) +bijector = bij.SplineAutoregressive() +dist_y = dist.Flow(dist_x, bijector) +``` + +The `bij.SplineAutoregressive` bijector extends `bij.Spline` so that the spline parameters are the output of an autoregressive neural network. See [[durkan2019neural]](/dev/bibliography#durkan2019neural) and [[germain2015made]](/dev/bibliography#germain2015made) for more details. + +Similarly to before, we train this distribution on the toy dataset and plot the results: + +```python +dataset = torch.tensor(X, dtype=torch.float) +optimizer = torch.optim.Adam(spline_transform.parameters(), lr=5e-3) +for step in range(steps): + optimizer.zero_grad() + loss = -dist_y.log_prob(dataset).mean() + loss.backward() + optimizer.step() + + if step % 500 == 0: + print('step: {}, loss: {}'.format(step, loss.item())) +``` + +``` +step: 0, loss: 8.446191787719727 +step: 500, loss: 2.0197808742523193 +step: 1000, loss: 1.794958472251892 +step: 1500, loss: 1.73616361618042 +step: 2000, loss: 1.7254879474639893 +step: 2500, loss: 1.691617488861084 +step: 3000, loss: 1.679549217224121 +step: 3500, loss: 1.6967085599899292 +step: 4000, loss: 1.6723777055740356 +step: 4500, loss: 1.6505967378616333 +step: 5000, loss: 1.8024061918258667 +``` + +```python +X_flow = dist_y.sample(torch.Size([1000,])).detach().numpy() +plt.title(r'Joint Distribution') +plt.xlabel(r'$x_1$') +plt.ylabel(r'$x_2$') +plt.scatter(X[:,0], X[:,1], label='data', alpha=0.5) +plt.scatter(X_flow[:,0], X_flow[:,1], color='firebrick', label='flow', alpha=0.5) +plt.legend() +plt.show() +``` + +

+ +

+ +```python +plt.subplot(1, 2, 1) +sns.distplot(X[:,0], hist=False, kde=True, + bins=None, + hist_kws={'edgecolor':'black'}, + kde_kws={'linewidth': 2}, + label='data') +sns.distplot(X_flow[:,0], hist=False, kde=True, + bins=None, color='firebrick', + hist_kws={'edgecolor':'black'}, + kde_kws={'linewidth': 2}, + label='flow') +plt.title(r'$p(x_1)$') +plt.subplot(1, 2, 2) +sns.distplot(X[:,1], hist=False, kde=True, + bins=None, + hist_kws={'edgecolor':'black'}, + kde_kws={'linewidth': 2}, + label='data') +sns.distplot(X_flow[:,1], hist=False, kde=True, + bins=None, color='firebrick', + hist_kws={'edgecolor':'black'}, + kde_kws={'linewidth': 2}, + label='flow') +plt.title(r'$p(x_2)$') +plt.show() +``` + +

+ +

+ +We see from the output that this normalizing flow has successfully learnt both the univariate marginals *and* the bivariate distribution. diff --git a/website/docs/users/parameters.md b/website/docs/users/parameters.md new file mode 100644 index 00000000..e3373624 --- /dev/null +++ b/website/docs/users/parameters.md @@ -0,0 +1,11 @@ +--- +id: parameters +title: Parameters +sidebar_label: Parameters +--- + +:::caution + +This document is under construction! + +::: diff --git a/website/docs/users/serialization.md b/website/docs/users/serialization.md new file mode 100644 index 00000000..04653b30 --- /dev/null +++ b/website/docs/users/serialization.md @@ -0,0 +1,9 @@ +--- +id: serialization +title: Serialization +sidebar_label: Serialization +--- + +:::info +Serialization of `flowtorch.distributions.TransformedDistribution` objects is likely to work but has not yet been tested. We expect this to be completed for our `v0.2` release and until then this page will serve as a placeholder. +::: diff --git a/website/docs/users/shapes.mdx b/website/docs/users/shapes.mdx new file mode 100644 index 00000000..3f611306 --- /dev/null +++ b/website/docs/users/shapes.mdx @@ -0,0 +1,106 @@ +--- +id: shapes +title: Shapes +sidebar_label: Shapes +--- + +One of the advantages of using FlowTorch is that we have carefully thought out how shape information is propagated from the base distribution through the sequence of bijective transforms. Before we explain how shapes are handled in FlowTorch, let us revisit the shape conventions shared across PyTorch and TensorFlow. + +## Shape Conventions +FlowTorch shares the shape conventions of PyTorch's [`torch.distributions.Distribution`](https://pytorch.org/docs/stable/distributions.html#distribution) and TensorFlow's [`tfp.distributions.Distribution`](https://www.tensorflow.org/probability/api_docs/python/tfp/distributions/Distribution) for representing random distributions. In these conventions, the shape of a tensor sampled from a random distribution is divided into three parts: the *sample shape*, the *batch shape*, and the *event shape*. + +As described in the [TensorFlow documentation](https://www.tensorflow.org/probability/examples/Understanding_TensorFlow_Distributions_Shapes#basics), + +* Event shape describes the shape of a single draw from the distribution, which may or may not be dependent across dimensions. +* Batch shape describes independent, not identically distributed draws, that is, a "batch" of distributions. +* Sample shape describes independent, identically distributed draws of batches from the distribution family. + +## Examples +This is best illustrated with some simple examples. Let's begin with a standard normal distribution: + +```python +import torch +import torch.distributions as dist +d = dist.Normal(loc=0, scale=1) +sample_shape = torch.Size([]) + +assert d.event_shape == torch.Size([]) +assert d.batch_shape == torch.Size([]) +assert d.sample(sample_shape).shape == torch.Size([]) +``` + +In this example, we have a single scalar normal distribution from which we draw a scalar sample. Since it is a scalar distribution, the `event_shape == torch.Size([])`. Since it is a single distribution, `batch_shape == torch.Size([])`. And we draw a scalar sample since `sample_shape == torch.Size([])`. + +Note that *the event shape and batch shape are properties of the distribution itself*, whereas the sample shape depends on the size argument passed to [`Distribution.sample`](https://pytorch.org/docs/stable/distributions.html#torch.distributions.distribution.Distribution.sample) or [`Distribution.rsample`](https://pytorch.org/docs/stable/distributions.html#torch.distributions.distribution.Distribution.rsample). Also, the shape of `d.sample(sample_shape)` is the concatenation of the `sample_shape`, `batch_shape`, and `event_shape`, in that order. + +Let's look at another example: + +```python +d = dist.Normal(loc=torch.zeros(1), scale=torch.ones(1)) +sample_shape = torch.Size([2]) + +assert d.event_shape == torch.Size([]) +assert d.batch_shape == torch.Size([1]) +assert d.sample(sample_shape).shape == torch.Size([2, 1]) +``` + +In this case, `event_shape = torch.Size([])` since we have a scalar distribution, but `batch_shape = torch.Size([1])` since we have tensor of parameters of that shape defining the distribution. Also, `sample_shape = torch.Size([2])` so that `d.sample(sample_shape).shape = torch.Size([2, 1])`. + +A further example: + +```python +d = dist.Normal(loc=torch.zeros(2, 5), scale=torch.ones(2, 5)) +sample_shape = torch.Size([3, 4]) + +assert d.event_shape == torch.Size([]) +assert d.batch_shape == torch.Size([2, 5]) +assert d.sample(sample_shape).shape == torch.Size([3, 4, 2, 5]) +``` + +We see that batch shapes, sample shapes (and event shapes) can have an arbitrary number of dimensions are are not restricted to being vectors. + +Is the event shape always `torch.Size([])`? This is not true for *multivariate* distributions, that is, distributions over vectors, matrices, and higher-order tensors that can have dependencies across their dimensions. For example: + +```python +d = dist.MultivariateNormal(loc=torch.zeros(2, 5), covariance_matrix=torch.eye(5)) +sample_shape = torch.Size([3, 4]) + +assert d.event_shape == torch.Size([5]) +assert d.batch_shape == torch.Size([2]) +assert d.sample(sample_shape).shape == torch.Size([3, 4, 2, 5]) +``` + +Note that the `covariance_matrix` tensor will be broadcast across `loc`. *Whereas the previous example defined a matrix batch of scalar normal distributions, this example defines a vector batch of multivariate normal distributions.* This is an important distinction! + +See [this page](https://ericmjl.github.io/blog/2019/5/29/reasoning-about-shapes-and-probability-distributions/) for further explanation on shape conventions. + +## Non-conditional Transformed Distributions +How do shapes work for transformed distributions that do not condition on a context variable, that is, distributions of the form $p_\theta(\mathbf{x})$? The sample shape depends strictly on the input to `.sample` or `.rsample` and so we restrict our attention to the batch and event shapes. + +Returning to the diagram on the [intro page](/users), suppose the base distribution is $p_0$, and the distribution after applying the the initial bijection, $f_1$, is $p_1$. Denote by $z_0$ a sample from the base distribution and $z_1=f_1(z_0)$. We make a few observations: + +Firstly, since $f_1$ is a bijection, $z_0$ must have the same number of dimensions as $z_1$. In our shape terminology, the sum of the event shape of the base distribution must be the same as the sum of the event shape of the transformed one. + +Secondly, the batch shape is preserved from the base distribution to transformed one. *By convention, we assume that a single bijection, $f_1$, is applied to a batch of base distributions, $\{p_{0,i}\}$, to produce a batch of the same shape of transformed distributions, $\{p_{1,i}\}$.* + +Thirdly, the event shape of the base distribution must be compatible with the domain of the bijection. For instance, if the base distribution has event shape `torch.Size([])` and is a scalar, it does not make sense to applied a bijection on matrices with, e.g., $\text{Dom}[f_1]\subseteq \mathbb{R}^{n\times m}$. + +Given a base distribution, `base`, and a non-conditional bijector `bijector`, the pseudo-code to calculate the batch and event shape of the transformed distribution, `flow`, looks like this: + +```python +# Input event shape must have at least as many dimensions as that which bijector operates over +assert len(base.event_shape) >= bijector.domain.event_dim + +flow.batch_shape = base.batch_shape +flow.event_shape = bijector.forward_shape(base.event_shape) + +# bijector.forward_shape and bijector.codomain.event_dim must be consistent +assert len(flow.event_shape) >= bijector.codomain.event_dim + +# bijectors preserve dimensions +assert sum(flow.event_shape) == sum(base.event_shape) +``` + +The `bijector` class defines the number of dimensions that it operates over in `bijector.domain.event_dim` and `bijector.codomain.event_dim`, and has a method `bijector.forward_shape` that specifies how the event shape of the input relates to that of the output. (In most cases, this will be the identity function.) + +This information is sufficient to construct the batch and event shapes of the transformed distribution from the base. For a Normalizing Flow that is the composition of multiple bijections, we apply this logic in succession, using the transformed distribution of the previous step as the base distribution of the next. \ No newline at end of file diff --git a/website/docs/users/start.mdx b/website/docs/users/start.mdx new file mode 100644 index 00000000..1f8180bf --- /dev/null +++ b/website/docs/users/start.mdx @@ -0,0 +1,136 @@ +--- +id: start +title: Your First Flow +sidebar_label: Your First Flow +--- + +## The Task +Let's begin training our first Normalizing Flow with a simple example! The target distribution that we intend to learn is, +$$ +\begin{aligned} + Y' &\sim \mathcal{N}\left(\mu=\begin{bmatrix} + 5 \\ + 5 +\end{bmatrix}, \Sigma=\begin{bmatrix} + 0.5 & 0 \\ + 0 & 0.5 +\end{bmatrix} \right) +\end{aligned}, +$$ +that is, a linear transformation of an standard multivariate normal distribution. The base distribution is, +$$ +\begin{aligned} + X &\sim \mathcal{N}\left(\mu=\begin{bmatrix} + 0 \\ + 0 +\end{bmatrix}, \Sigma=\begin{bmatrix} + 1 & 0 \\ + 0 & 1 +\end{bmatrix} \right) +\end{aligned}, +$$ +that is, standard normal noise (which is typical for Normalizing Flows). The task is to learn some bijection $g_\theta$ so that +$$ +\begin{aligned} + Y &\triangleq g_\theta(X) \\ + &\sim Y' +\end{aligned} +$$ +approximately holds. We will define our Normalizing Flow, $g_\theta$ by a single affine transformation, +$$ +\begin{aligned} + g_\theta(\mathbf{x}) &\triangleq \begin{bmatrix} + \mu_1 \\ + \mu_2(x_1) +\end{bmatrix} + \begin{bmatrix} + \sigma_1 \\ + \sigma_2(x_1) +\end{bmatrix}\otimes\begin{bmatrix} + x_1 \\ + x_2 +\end{bmatrix}. +\end{aligned} +$$ +In this notation, $\mathbf{x}=(x_1,x_2)^T$, $\otimes$ denotes element-wise multiplication, and the parameters are the scalars $\mu_1,\sigma_1$ and the parameters of the neural networks $\mu_2(\cdot)$ and $\sigma_2(\cdot)$. (Think of the NNs as very simple shallow feedforward nets in this example.) This is an example of [Inverse Autoregressive Flow](/dev/bibliography#kingma2016improving). + +There are several metrics we could use to train $Y$ to be close in distribution to $Y'$. First, let us denote the target distribution of $Y'$ by $p(\cdot)$ and the learnable distribution of the normalizing flow, $Y$, as $q_\theta(\cdot)$ (in the following sections, we will explain how to calculate $q_\theta$ from $g_\theta$). Let's use the forward [KL-divergence](https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence), +$$ +\begin{aligned} +\text{KL}\{p\ ||\ q_\theta\} &\triangleq \mathbb{E}_{p(\cdot)}\left[\log\frac{p(Y')}{q_\theta(Y')}\right] \\ +&= -\mathbb{E}_{p(\cdot)}\left[\log q_\theta(Y')\right] + C, +\end{aligned} +$$ +where C is a constant that does not depend on $\theta$. In practice, we draw a finite sample, $\{y_1,\ldots,y_M\}$, from $p$ and optimize a [Monte Carlo estimate](https://en.wikipedia.org/wiki/Monte_Carlo_integration) of the KL-divergence with stochastic gradient descent so that the loss is, +$$ +\begin{aligned} + \mathcal{L}(\theta) &= -\frac{1}{M}\sum^M_{m=1}\log(q_\theta(y_m)) +\end{aligned} +$$ + +*So, to summarize, the task at hand is to learn how to transform standard bivariate normal noise into another bivariate normal distribution using an affine transformation, and we will do so by matching distributions with the KL-divergence metric.* + +## Implementation +First, we import the relevant libraries: +```python +import torch +import flowtorch.bijectors as bij +import flowtorch.distributions as dist +``` +The base and target distributions are defined using standard PyTorch: +```python +base_dist = torch.distributions.Independent( + torch.distributions.Normal(torch.zeros(2), torch.ones(2)), + 1 +) +target_dist = torch.distributions.Independent( + torch.distributions.Normal(torch.zeros(2)+5, torch.ones(2)*0.5), + 1 +) +``` +Note the use of [`torch.distributions.Independent`](https://pytorch.org/docs/stable/distributions.html#independent) so that our base and target distributions are *vector valued*. + +We can visualize samples from the base and target: +

+ +

+ +A Normalizing Flow is created in two steps. First, we create a "plan" for the flow as a `flowtorch.bijectors.Bijector` object, +```python +# Lazily instantiated flow +bijectors = bij.AffineAutoregressive() +``` +This plan is then made concrete by combining it with the base distributions, which provides the input shape, and constructing a `flowtorch.distributions.Flow` object, and extension of `torch.distributions.Distribution`: +```python +# Instantiate transformed distribution and parameters +flow = dist.Flow(base_dist, bijectors) +``` +At this point, we have an object, `flow`, for the distribution, $q_\theta(\cdot)$, that follows the standard PyTorch interface. Therefore, it can be trained with the following code, which will be familiar for readers who have used `torch.distributions` before: +```python +# Training loop +opt = torch.optim.Adam(flow.parameters(), lr=5e-3) +for idx in range(3001): + opt.zero_grad() + + # Minimize KL(p || q) + y = target_dist.sample((1000,)) + loss = -flow.log_prob(y).mean() + + if idx % 500 == 0: + print('epoch', idx, 'loss', loss) + + loss.backward() + opt.step() +``` +Note how we obtain the learnable parameters of the normalizing flow from the `flow` object, which is a `torch.nn.Module`. Visualizing samples after learning, we see that we have been successful in matching the target distribution: +

+ +

+Congratulations on training your first flow! + +## Discussion + +This simple example illustrates a few important points of FlowTorch's design: + +Firstly, `Bijector` objects are agnostic to their shape. A `Bijector` object specifies *how the shape is changed* by the forward and inverse operations, and then calculates the exact shapes when it obtains knowledge of the base distribution, when `flow = dist.Flow(base_dist, bijectors)` is run. Any neural networks or other parametrized functions, which also require this shape information, are not instantiated until the same moment. In this sense, a `Bijector` can be thought of as a lazy plan for creating a normalizing flow. The advantage of doing things this way is that the shape information can be "type checked" and does not need to be specified in multiple locations (ensuring these quantities are consistent). + +Secondly, all objects are designed to have sensible defaults. We do not need to define the conditioning network for `bijectors.AffineAutoregressive`, it will use a [MADE network](/dev/bibliography#germain2015made) with sensible hyperparameters and defer initialization until it later receives shape information. Thirdly, there is compatibility, in as far as is possible, with standard PyTorch interfaces such as `torch.distributions`. diff --git a/website/docs/users/structure.md b/website/docs/users/structure.md new file mode 100644 index 00000000..135c4ef4 --- /dev/null +++ b/website/docs/users/structure.md @@ -0,0 +1,56 @@ +--- +id: structure +title: Structured Representations +sidebar_label: Structured Representations +--- + +## Bayesian Networks +The *structure* of a distribution refers to the set of independence relationships that hold for the distribution. Suppose we have a distribution over variables, $\{x_1,x_2,\ldots,x_N\}$. At one extreme, the variables are fully *independent* and the distribution can be written as, +$$ +p(\mathbf{x}) = \prod^N_{n=1}p(x_n). +$$ +At the other extreme, the variables are fully *dependent* and the distribution can be written as, +$$ +p(\mathbf{x}) = \prod^N_{n=1}p(x_n\mid x_1,x_2\ldots,x_{n-1}). +$$ +In between these two extremes, a distribution will have a factorization with factors that condition on some but not all of the previous variables under a given ordering. + +The field of *Probabilistic Graphical Models* studies graphical representations that express these structural relationships within distributions, as well as inference algorithms that operate directly on the graphical structures. For instance, we say that the fully independent distribution factors over the following *directed acyclic graph* (DAG), also known as a *Bayesian network (BN) structure*, + +[insert graph svg] + +And the fully dependent distribution factors over the fully connected BN structure, + +[insert graph svg] + +Therefore, full independence corresponds to zero edges, full dependence corresponds to the maximum number of edges in a DAG (that is, $N\ \text{choose}\ 2$), and it seems reasonable that distributions between these two extremes factor according to some graph with an intermediate number of edges. For instance, a graph over $x_1,\ldots,x_7$ might factor according to this graph, + +[insert graph svg] + +For Bayesian networks, that is, *directed* graphical models (there are other formalisms for *undirected*, *bidirected*, and graphs with mixed directionality), the semantics of the graphical structure are that ... It can be shown that a graph factors according to a BN structure if and only if ... + +So in a sense, the BN structure and a distribution's factorization are equivalent, and both express the conditional independence relationships that hold in the distribution. + +## Faithfulness and Minimality +One important point to note is that the BN structure for which a distribution factors over may fail to express some of the independence relationships that hold in a distribution - it must not, however, express independence relationships that *do not* hold in the distribution. For instance, any distribution factors according to $\prod^N_{n=1}p(x_n\mid x_1,x_2\ldots,x_{n-1})$, by the chain rule of probability. So, the fully connected DAG is a valid BN structure for the fully *independent* distribution. + +* Lack of edges express conditional independent relationships that hold in a distribution, whereas the presence of edges is non-informative. + +* Definition of I-map, minimal I-Map, and faithful + +* Non-uniqueness of the minimal I-map. Also can have varying number of edges! + +## Structure of Normalizing Flows +The dependency structure of normalizing flows is not something that has been considered in the literature, save for a few papers (for example, ?). Typically, they input a fully independent . + +However, it can be advantageous to represent some structure in the distribution and use this as an inductive prior for learning. [Cite my work!] showed ... + +## Abstractions for Expressing Structure + +:::info + +Keeping this discussion in mind, we are developing an abstraction for expressing structure in a normalizing flow for the `v0.2` release. This abstraction is likely to belong to both `Params` and `Bijector`s, and analogously to the `.forward_shape` and `.backward_shape` methods, informs the `TransformedDistribution` class how the dependency structure is effected by each layer of the normalizing flow. + +There will likely be two methods exposed to the user on `TransformedDistribution`: `.factorization`, and `.topological_order`. The first, `.factorization` might return a dictionary from variable indices to the parents of that variable in a minimal I-map. Another possibility is for `.factorization` to input a variable indices and return the array of parent indices (in which case, perhaps it should be called `.parents` and perhaps there should be a `.children` too?). This may be better if calculating and returning the whole object is an expensive operation. The second, `.topological_order`, returns an array of indices in topological ordering, possibly only calculating this lazily the first time it is requested. + +::: diff --git a/website/docs/users/torchscript.md b/website/docs/users/torchscript.md new file mode 100644 index 00000000..6ea1dc03 --- /dev/null +++ b/website/docs/users/torchscript.md @@ -0,0 +1,9 @@ +--- +id: torchscript +title: TorchScript Support +sidebar_label: TorchScript +--- + +:::info +FlowTorch bijectors, conditioning networks, and transformed distributions are likely to work with [TorchScript](https://pytorch.org/docs/stable/jit.html) constructions but have not yet been fully tested. TorchScript support is expected for our `v0.2` release and until then this page will serve as a placeholder. +::: diff --git a/website/docs/users/transformed_distributions.md b/website/docs/users/transformed_distributions.md new file mode 100644 index 00000000..928d93ad --- /dev/null +++ b/website/docs/users/transformed_distributions.md @@ -0,0 +1,11 @@ +--- +id: transformed_distributions +title: Transformed Distributions +sidebar_label: Transformed Distributions +--- + +:::caution + +This document is under construction! + +::: diff --git a/website/docs/users/univariate.mdx b/website/docs/users/univariate.mdx new file mode 100644 index 00000000..378e35b7 --- /dev/null +++ b/website/docs/users/univariate.mdx @@ -0,0 +1,254 @@ +--- +id: univariate +title: Univariate Bijections +sidebar_label: Univariate Bijections +--- +## Background +[Normalizing Flows](/dev/bibliography#surveys) are a family of methods for constructing flexible distributions. As mentioned in [the introduction](/users), Normalizing Flows can be seen as a modern take on the [change of variables method for random distributions](https://en.wikipedia.org/wiki/Probability_density_function#Function_of_random_variables_and_change_of_variables_in_the_probability_density_function), and this is most apparent for univariate bijections. Thus, in this first section we restrict our attention to representing univariate distributions with bijections. + +The basic idea is that a simple source of noise, for example a variable with a standard normal distribution, $X\sim\mathcal{N}(0,1)$, is passed through a bijective (i.e. invertible) function, $g(\cdot)$ to produce a more complex transformed variable $Y=g(X)$. For such a random variable, we typically want to perform two operations: sampling and scoring. Sampling $Y$ is trivial. First, we sample $X=x$, then calculate $y=g(x)$. Scoring $Y$, or rather, evaluating the log-density $\log(p_Y(y))$, is more involved. How does the density of $Y$ relate to the density of $X$? We can use the substitution rule of integral calculus to answer this. Suppose we want to evaluate the expectation of some function of $X$. Then, + +$$ +\begin{aligned} +\mathbb{E}_{p_X(\cdot)}\left[f(X)\right] &= \int_{\text{supp}(X)}f(x)p_X(x)dx\\ + &= \int_{\text{supp}(Y)}f(g^{-1}(y))p_X(g^{-1}(y))\left|\frac{dx}{dy}\right|dy \\ + &= \mathbb{E}_{p_Y(\cdot)}\left[f(g^{-1}(Y))\right], +\end{aligned} +$$ + +where $\text{supp}(X)$ denotes the support of $X$, which in this case is $(-\infty,\infty)$. Crucially, we used the fact that $g$ is bijective to apply the substitution rule in going from the first to the second line. Equating the last two lines we get, + +$$ +\begin{aligned} + \log(p_Y(y)) &= \log(p_X(g^{-1}(y)))+\log\left(\left|\frac{dx}{dy}\right|\right)\\ + &= \log(p_X(g^{-1}(y)))-\log\left(\left|\frac{dy}{dx}\right|\right). +\end{aligned} +$$ + +Inituitively, this equation says that the density of $Y$ is equal to the density at the corresponding point in $X$ plus a term that corrects for the warp in volume around an infinitesimally small length around $Y$ caused by the transformation. + +If $g$ is cleverly constructed (and we will see several examples shortly), we can produce distributions that are more complex than standard normal noise and yet have easy sampling and computationally tractable scoring. Moreover, we can compose such bijective transformations to produce even more complex distributions. By an inductive argument, if we have $L$ transforms $g_{(0)}, g_{(1)},\ldots,g_{(L-1)}$, then the log-density of the transformed variable $Y=(g_{(0)}\circ g_{(1)}\circ\cdots\circ g_{(L-1)})(X)$ is + +$$ +\begin{aligned} + \log(p_Y(y)) &= \log\left(p_X\left(\left(g_{(L-1)}^{-1}\circ\cdots\circ g_{(0)}^{-1}\right)\left(y\right)\right)\right)+\sum^{L-1}_{l=0}\log\left(\left|\frac{dg^{-1}_{(l)}(y_{(l)})}{dy'}\right|\right), +\end{aligned} +$$ + +where we've defined $y_{(0)}=x$, $y_{(L-1)}=y$ for convenience of notation. In the [following section](/users/multivariate), we will see how to generalize this method to multivariate $X$. + +## Fixed Univariate `Bijector`s +[FlowTorch](https://flowtorch.ai) contains classes for representing *fixed* univariate bijective transformations. These are particularly useful for restricting the range of transformed distributions, for example to lie on the unit hypercube. (In the following sections, we will explore how to represent learnable bijectors.) + +Let us begin by showing how to represent and manipulate a simple transformed distribution, + +$$ +\begin{aligned} + X &\sim \mathcal{N}(0,1)\\ + Y &= \text{exp}(X). +\end{aligned} +$$ + +You may have recognized that this is by definition, $Y\sim\text{LogNormal}(0,1)$. + +We begin by importing the relevant libraries: + +```python +import torch +import flowtorch.bijectors as bij +import flowtorch.distributions as dist +import matplotlib.pyplot as plt +import seaborn as sns +``` +A variety of bijective transformations live in the `flowtorch.bijectors` module, and the classes to define transformed distributions live in `flowtorch.distributions`. We first create the base distribution of $X$ and the class encapsulating the transform $\text{exp}(\cdot)$: +```python +dist_x = torch.distributions.Independent( + torch.distributions.Normal(torch.zeros(1), torch.ones(1)), + 1 +) +bijector = bij.Exp() +``` +The class `bij.Exp` derives from `bij.Fixed` and defines the forward, inverse, and log-absolute-derivative operations for this transform, + +$$ +\begin{aligned} + g(x) &= \text{exp(x)}\\ + g^{-1}(y) &= \log(y)\\ + \log\left(\left|\frac{dg}{dx}\right|\right) &= y. +\end{aligned} +$$ + +In general, a bijector class defines these three operations, from which it is sufficient to perform sampling and scoring. *We should think of a bijector as a plan to construct a normalizing flow rather than the normalizing flow itself* - it requires being instantiated with a concrete base distribution supplying the relevant shape information, +```python +dist_y = dist.Flow(dist_x, bijector) +``` +This statement returns the object `dist_y` of type `flowtorch.distributions.Flow` representing an object that has an interface compatible with `torch.distributions.Distribution`. We are able to sample and score from `dist_y` object using its methods `.sample`, `.rsample`, and `.log_prob`. + +Now, plotting samples from both the base and transformed distributions to verify that we that have produced the log-normal distribution: +```python +plt.subplot(1, 2, 1) +plt.hist(dist_x.sample([1000]).numpy(), bins=50) +plt.title('Standard Normal') +plt.subplot(1, 2, 2) +plt.hist(dist_y.sample([1000]).numpy(), bins=50) +plt.title('Standard Log-Normal') +plt.show() +``` +

+ +

+Our example uses a single transform. However, we can compose transforms to produce more expressive distributions. For instance, if we apply an affine transformation we can produce the general log-normal distribution, + +$$ +\begin{aligned} + X &\sim \mathcal{N}(0,1)\\ + Y &= \text{exp}(\mu+\sigma X). +\end{aligned} +$$ + +or rather, $Y\sim\text{LogNormal}(\mu,\sigma^2)$. In FlowTorch this is accomplished, e.g. for $\mu=3, \sigma=0.5$, as follows: +```python +bijectors = bij.Compose([ + bij.AffineFixed(loc=3, scale=0.5), + bij.Exp()]) +dist_y = dist.Flow(dist_x, bijector) + +plt.subplot(1, 2, 1) +plt.hist(dist_x.sample([1000]).numpy(), bins=50) +plt.title('Standard Normal') +plt.subplot(1, 2, 2) +plt.hist(dist_y.sample([1000]).numpy(), bins=50) +plt.title('Log-Normal') +plt.show() +``` +

+ +

+ +The class `bij.Compose` combines multiple `Bijector`s with [function composition](https://en.wikipedia.org/wiki/Function_composition) to produce a single *plan* for a Normalizing Flow, which is then intiated in the regular way. For the forward operation, transformations are applied in the order of the list. In this case, first `AffineFixed` is applied to the base distribution and then `Exp`. + +## Learnable Univariate `Bijector`s +Having introduced the interface for bijections and transformed distributions, we now show how to represent *learnable* transforms and use them for density estimation. Our dataset in this section and the next will comprise samples along two concentric circles. Examining the joint and marginal distributions: +```python +import numpy as np +from sklearn import datasets +from sklearn.preprocessing import StandardScaler + +n_samples = 1000 +X, y = datasets.make_circles(n_samples=n_samples, factor=0.5, noise=0.05) +X = StandardScaler().fit_transform(X) + +plt.title(r'Samples from $p(x_1,x_2)$') +plt.xlabel(r'$x_1$') +plt.ylabel(r'$x_2$') +plt.scatter(X[:,0], X[:,1], alpha=0.5) +plt.show() +``` + +

+ +

+ +```python +plt.subplot(1, 2, 1) +sns.distplot(X[:,0], hist=False, kde=True, + bins=None, + hist_kws={'edgecolor':'black'}, + kde_kws={'linewidth': 2}) +plt.title(r'$p(x_1)$') +plt.subplot(1, 2, 2) +sns.distplot(X[:,1], hist=False, kde=True, + bins=None, + hist_kws={'edgecolor':'black'}, + kde_kws={'linewidth': 2}) +plt.title(r'$p(x_2)$') +plt.show() +``` + +

+ +

+ +We will learn the marginals of the above distribution using a learnable transform, `bij.Spline`, defined on a two-dimensional input: + +```python +dist_x = torch.distributions.Independent( + torch.distributions.Normal(torch.zeros(2), torch.ones(2)), + 1 +) +bijector = bij.Spline() +dist_y = dist.Flow(dist_x, bijector) +``` + +`bij.Spline` passes each dimension of its input through a separate monotonically increasing function known as a spline. From a high-level, a spline is a complex parametrizable curve for which we can define specific points known as knots that it passes through and the derivatives at the knots. The knots and their derivatives are parameters that can be learnt, e.g., through stochastic gradient descent on a maximum likelihood objective, as we now demonstrate: + +```python +optimizer = torch.optim.Adam(dist_y.parameters(), lr=1e-2) +for step in range(steps): + optimizer.zero_grad() + loss = -dist_y.log_prob(X).mean() + loss.backward() + optimizer.step() + + if step % 200 == 0: + print('step: {}, loss: {}'.format(step, loss.item())) +``` + +``` +step: 0, loss: 2.682476758956909 +step: 200, loss: 1.278384804725647 +step: 400, loss: 1.2647961378097534 +step: 600, loss: 1.2601449489593506 +step: 800, loss: 1.2561875581741333 +step: 1000, loss: 1.2545257806777954 +``` + +Plotting samples drawn from the transformed distribution after learning: +```python +X_flow = dist_y.sample(torch.Size([1000,])).detach().numpy() +plt.title(r'Joint Distribution') +plt.xlabel(r'$x_1$') +plt.ylabel(r'$x_2$') +plt.scatter(X[:,0], X[:,1], label='data', alpha=0.5) +plt.scatter(X_flow[:,0], X_flow[:,1], color='firebrick', label='flow', alpha=0.5) +plt.legend() +plt.show() +``` + +

+ +

+ +```python +plt.subplot(1, 2, 1) +sns.distplot(X[:,0], hist=False, kde=True, + bins=None, + hist_kws={'edgecolor':'black'}, + kde_kws={'linewidth': 2}, + label='data') +sns.distplot(X_flow[:,0], hist=False, kde=True, + bins=None, color='firebrick', + hist_kws={'edgecolor':'black'}, + kde_kws={'linewidth': 2}, + label='flow') +plt.title(r'$p(x_1)$') +plt.subplot(1, 2, 2) +sns.distplot(X[:,1], hist=False, kde=True, + bins=None, + hist_kws={'edgecolor':'black'}, + kde_kws={'linewidth': 2}, + label='data') +sns.distplot(X_flow[:,1], hist=False, kde=True, + bins=None, color='firebrick', + hist_kws={'edgecolor':'black'}, + kde_kws={'linewidth': 2}, + label='flow') +plt.title(r'$p(x_2)$') +plt.show() +``` + +

+ +

+ +As we can see, we have learnt close approximations to the marginal distributions, $p(x_1),p(x_2)$. *It would have been challenging to fit the irregularly shaped marginals with standard methods, for example, a mixture of normal distributions*. As expected, since there is a dependency between the two dimensions, we do not learn a good representation of the joint, $p(x_1,x_2)$. In the next section, we explain how to learn multivariate distributions whose dimensions are not independent. diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js new file mode 100644 index 00000000..d63e612f --- /dev/null +++ b/website/docusaurus.config.js @@ -0,0 +1,169 @@ +const math = require('remark-math'); +const katex = require('rehype-katex'); + +module.exports = { + title: 'FlowTorch', + tagline: 'Easily learn and sample complex probability distributions with PyTorch', + url: 'https://flowtorch.ai', + baseUrl: '/', + onBrokenLinks: 'throw', + onBrokenMarkdownLinks: 'warn', + favicon: 'img/favicon.png', + organizationName: 'facebookincubator', + projectName: 'flowtorch', + stylesheets: [ + { + href: 'https://cdn.jsdelivr.net/npm/katex@0.12.0/dist/katex.min.css', + type: 'text/css', + integrity: + 'sha384-AfEj0r4/OFrOo5t7NnNe46zW/tFgW6x/bCJG8FqQCEo3+Aro6EYUG4+cU+KJWu/X', + crossorigin: 'anonymous', + }, + ], + baseUrlIssueBanner: true, + themeConfig: { + announcementBar: { + id: 'supportus', + content: + '⭐️ If you like FlowTorch, give it a star on GitHub! ⭐️', + }, + prism: { + theme: require("prism-react-renderer/themes/github"), + darkTheme: require("prism-react-renderer/themes/dracula"), + }, + navbar: { + title: 'FlowTorch', + logo: { + alt: 'FlowTorch Logo', + src: 'img/logo.svg', + }, + items: [ + { + to: 'users', + activeBasePath: 'users', + label: 'Users', + position: 'left', + }, + { + to: 'dev', + activeBasePath: 'dev', + label: 'Developers', + position: 'left', + }, + { + to: 'api', + activeBasePath: 'api', + label: 'Reference', + position: 'left', + }, + { + href: 'https://github.com/facebookincubator/flowtorch/discussions', + label: 'Discussions', + position: 'right', + }, + { + href: 'https://github.com/facebookincubator/flowtorch/releases', + label: 'Releases', + position: 'right', + }, + { + href: 'https://github.com/facebookincubator/flowtorch', + label: 'GitHub', + position: 'right', + }, + ], + }, + footer: { + style: 'dark', + links: [ + { + title: 'Docs', + items: [ + { + label: 'Users Guide', + to: 'users', + }, + { + label: 'Developers Guide', + to: 'dev', + }, + { + label: 'API Reference', + to: 'api', + }, + { + label: 'Roadmap', + href: 'https://github.com/facebookincubator/flowtorch/projects', + }, + ], + }, + { + title: 'Community', + items: [ + { + label: 'Raise an issue', + href: 'https://github.com/facebookincubator/flowtorch/issues/new/choose', + }, + { + label: 'Ask for help', + href: 'https://github.com/facebookincubator/flowtorch/discussions/new', + }, + { + label: 'Give us feedback', + href: 'https://github.com/facebookincubator/flowtorch/discussions/categories/feedback', + }, + { + label: 'Fork the repo', + href: 'https://github.com/facebookincubator/flowtorch/fork', + }, + ], + }, + { + title: 'Legal', + items: [ + { + label: 'MIT Open Source License', + href: 'https://github.com/facebookincubator/flowtorch/blob/main/LICENSE.txt', + }, + { + label: 'Code of Conduct', + href: 'https://www.contributor-covenant.org/version/1/4/code-of-conduct/', + }, + // Please do not remove the privacy and terms, it's a legal requirement. + { + label: 'Privacy', + href: 'https://opensource.facebook.com/legal/privacy/', + target: '_blank', + rel: 'noreferrer noopener', + }, + { + label: 'Terms', + href: 'https://opensource.facebook.com/legal/terms/', + target: '_blank', + rel: 'noreferrer noopener', + }, + ], + }, + ], + copyright: `Copyright © ${new Date().getFullYear()} Facebook, Inc. and its affiliates. All Rights Reserved.`, + }, + }, + presets: [ + [ + '@docusaurus/preset-classic', + { + docs: { + sidebarPath: require.resolve('./sidebars.js'), + editUrl: 'https://github.com/facebookincubator/flowtorch/edit/main/website/', + routeBasePath: '/', + remarkPlugins: [math], + rehypePlugins: [katex], + }, + blog: false, + theme: { + customCss: require.resolve('./src/css/custom.css'), + }, + }, + ], + ], +}; diff --git a/website/flowtorch-ai.png b/website/flowtorch-ai.png new file mode 100644 index 00000000..ce3dadab Binary files /dev/null and b/website/flowtorch-ai.png differ diff --git a/website/package.json b/website/package.json new file mode 100644 index 00000000..0ac06e12 --- /dev/null +++ b/website/package.json @@ -0,0 +1,45 @@ +{ + "name": "flowtorch", + "version": "0.0.0", + "private": true, + "scripts": { + "docusaurus": "docusaurus", + "start": "docusaurus start", + "build": "docusaurus build", + "swizzle": "docusaurus swizzle", + "deploy": "docusaurus deploy", + "serve": "docusaurus serve", + "clear": "docusaurus clear" + }, + "dependencies": { + "@docusaurus/core": "2.0.0-alpha.70", + "@docusaurus/preset-classic": "2.0.0-alpha.70", + "@fortawesome/fontawesome-free": "^5.15.1", + "@fortawesome/fontawesome-svg-core": "^1.2.32", + "@fortawesome/free-brands-svg-icons": "^5.15.1", + "@fortawesome/free-regular-svg-icons": "^5.15.1", + "@fortawesome/free-solid-svg-icons": "^5.15.1", + "@fortawesome/react-fontawesome": "^0.1.14", + "@mdx-js/react": "^1.6.21", + "clsx": "^1.1.1", + "node-fetch": "^2.6.1", + "node-gyp": "^7.1.2", + "react": "^16.8.4", + "react-dom": "^16.8.4", + "react-icons": "^4.2.0", + "rehype-katex": "^4.0.0", + "remark-math": "^3.0.1" + }, + "browserslist": { + "production": [ + ">0.5%", + "not dead", + "not op_mini all" + ], + "development": [ + "last 1 chrome version", + "last 1 firefox version", + "last 1 safari version" + ] + } +} diff --git a/website/sidebars.js b/website/sidebars.js new file mode 100644 index 00000000..8a16e5e1 --- /dev/null +++ b/website/sidebars.js @@ -0,0 +1,15 @@ +var apiSideBar = require('./api.sidebar.js'); + +module.exports = { + usersSidebar: { + "Getting Started": ['users/introduction', 'users/installation', 'users/start'], + "Tutorials": ['users/univariate', 'users/multivariate'], + "Basic Concepts": ['users/shapes'], + }, + devsSidebar: { + "General": ['dev/contributing', 'dev/releases', 'dev/about'], + "Extending the Library": ['dev/overview', 'dev/ops', 'dev/docs', 'dev/tests'], + "Resources": ['dev/bibliography'], + }, + apiSidebar: apiSideBar, +}; diff --git a/website/src/css/custom.css b/website/src/css/custom.css new file mode 100644 index 00000000..32259d4a --- /dev/null +++ b/website/src/css/custom.css @@ -0,0 +1,528 @@ +/* stylelint-disable docusaurus/copyright-header */ +/** + * Any CSS included here will be global. The classic template + * bundles Infima by default. Infima is a CSS framework designed to + * work well for content-centric websites. + */ + +/* You can override the default Infima variables here. */ +:root { + --ifm-background-color: #fff; + --ifm-background-surface-color: #f2f6fa; + --ifm-color-primary: #ff6344; + --ifm-color-primary-dark: #ff4824; + --ifm-color-primary-darker: #ff3b14; + --ifm-color-primary-darkest: #e22500; + --ifm-color-primary-light: #ff7e64; + --ifm-color-primary-lighter: #ff8b74; + --ifm-color-primary-lightest: #ffb4a5; + --ifm-card-background-color: var(--ifm-background-color); + --ifm-footer-background-color: var(--ifm-background-surface-color); + --ifm-hero-background-color: rgb(36, 37, 38); /*var(--ifm-background-surface-color); /* rgb(43, 49, 55);*/ + --ifm-hero-text-color: white; /* rgb(68, 73, 80);*/ + --ifm-navbar-background-color: white; + --color-button-primary: rgb(255, 99, 68); + --color-button-primary-hover: rgb(255, 81, 46); + --color-button-secondary: rgb(255, 186, 0); + --color-button-secondary-hover: rgb(233, 169, 0); + --ifm-footer-link-hover-color: var(--ifm-navbar-link-hover-color) +} + +div[class^='announcementBarContent'] { + background-color: var(--ifm-background-surface-color); + color: var(--ifm-font-color-base); +} + +html[data-theme="dark"] { + --ifm-color-primary: #ffcf5d; + --ifm-color-primary-dark: #ffc53a; + --ifm-color-primary-darker: #ffc029; + --ifm-color-primary-darkest: #f4ab00; + --ifm-color-primary-light: #ffd980; + --ifm-color-primary-lighter: #ffde91; + --ifm-color-primary-lightest: #ffeec5; + --ifm-navbar-background-color: rgb(36, 37, 38); + --ifm-hero-text-color: white; +} + +.hero__subtitle { + font-size: 3.75rem; + text-align: left; +} + +.hero__subtitle .hero__primary { + color:var(--color-button-primary); + text-shadow: 1px 1px 2px var(--color-button-primary-hover); +} + +.hero__subtitle .hero__secondary { + color: var(--color-button-secondary); + text-shadow: 1px 1px 2px var(--color-button-secondary-hover); +} + +.hero__github_button { + border: none; +} + +.hero__buttons .button--primary { + background-color: var(--color-button-primary); + border-color: var(--color-button-primary); +} + +.hero__buttons .button--primary:hover { + background-color: var(--color-button-primary-hover); + border-color: var(--color-button-primary-hover); +} + +html[data-theme="dark"] .hero__buttons .button--primary { + background-color: var(--color-button-primary); + border-color: var(--color-button-primary); +} + +html[data-theme="dark"] .hero__buttons .button--primary:hover { + background-color: var(--color-button-primary-hover); + border-color: var(--color-button-primary-hover); +} + +#features .col { + padding-left: 2rem; + padding-right: 2rem; +} + +.hero__img { + float: right; + max-width: 200px; + padding-left: 40px; + padding-right: 6rem; + padding-top: 1.1rem; +} + +.hero__buttons { + padding-top: 2rem; +} + +.hero__buttons .button { + margin-right: 3rem; +} + +.banner_src-theme-Hero- { + text-align: left; +} + +.hero__buttons .button--lg { + --ifm-button-size-multiplier: 1.5; +} + +.docusaurus-highlight-code-line { + display: block; + margin: 0 calc(-1 * var(--ifm-pre-padding)); + padding: 0 var(--ifm-pre-padding); + background-color: rgb(72, 77, 91); +} + +html[data-theme="dark"] .docusaurus-highlight-code-line { + background-color: rgba(0, 0, 0, 0.3); +} + +/*.navbar__items { + vertical-align: middle; +}*/ + +a { + transition: none; +} + +.navbar__item.navbar__link[href*="users"] { + display: flex; + padding: 0px 12px; + padding-right: 12px; +} +.navbar__item.navbar__link[href*="users"]:before { + mask: url('/img/user-astronaut-solid.svg'); + mask-size: cover; + display: inline-block; + content: ''; + background-color: black; + width: 0.875rem; + height: 1rem; + margin-top: 0.3rem; + margin-right: 0.5rem; +} + +.navbar__title:hover { + /*color: red;*/ + text-align: center; + -webkit-animation: glow 1s ease-in-out infinite alternate; + -moz-animation: glow 1s ease-in-out infinite alternate; + animation: glow 1s ease-in-out infinite alternate; +} + +@keyframes glow { + from { + text-shadow: 0 0 10px #fff, 0 0 20px #fff, 0 0 30px #e60073, 0 0 40px #e60073, 0 0 50px #e60073, 0 0 60px #e60073, 0 0 70px #e60073; + } + to { + text-shadow: 0 0 20px #fff, 0 0 30px #ff4da6, 0 0 40px #ff4da6, 0 0 50px #ff4da6, 0 0 60px #ff4da6, 0 0 70px #ff4da6, 0 0 80px #ff4da6; + } +} + +html[data-theme="dark"] .navbar__item.navbar__link[href*="users"]:before { + background-color: white; +} + +html[data-theme="dark"] .navbar__item.navbar__link[href*="users"]:hover:before { + background-color: var(--ifm-navbar-link-hover-color); +} + +.navbar__item.navbar__link[href*="users"]:hover:before { + background-color: var(--ifm-navbar-link-hover-color); +} + +.navbar__item.navbar__link[href*="dev"] { + display: flex; + padding: 0px 12px; +} +.navbar__item.navbar__link[href*="dev"]:before { + mask: url("/img/hat-wizard-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: black; + width: 1rem; + height: 1rem; + margin-top: 0.3rem; + margin-right: 0.5rem; +} + +.navbar__item.navbar__link[href*="dev"]:hover:before { + background-color: var(--ifm-navbar-link-hover-color); +} + +html[data-theme="dark"] .navbar__item.navbar__link[href*="dev"]:before { + background-color: white; +} + +html[data-theme="dark"] .navbar__item.navbar__link[href*="dev"]:hover:before { + background-color: var(--ifm-navbar-link-hover-color); +} + +.navbar__item.navbar__link[href*="api"] { + display: flex; + padding: 0px 12px; +} +.navbar__item.navbar__link[href*="api"]:before { + mask: url("/img/book-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: black; + width: 0.875rem; + height: 1rem; + margin-top: 0.3rem; + margin-right: 0.5rem; +} + +.navbar__item.navbar__link[href*="api"]:hover:before { + background-color: var(--ifm-navbar-link-hover-color); +} + +html[data-theme="dark"] .navbar__item.navbar__link[href*="api"]:before { + background-color: white; +} + +html[data-theme="dark"] .navbar__item.navbar__link[href*="api"]:hover:before { + background-color: var(--ifm-navbar-link-hover-color); +} + +.footer__link-item:before { + opacity:0; +} + +.footer__link-item:hover:before { + opacity:1; +} + +.footer__link-item:hover:before { + animation: fadeIn ease 0.25s; + -webkit-animation: fadeIn ease 0.25s; + -moz-animation: fadeIn ease 0.25s; + -o-animation: fadeIn ease 0.25s; + -ms-animation: fadeIn ease 0.25s; +} + +@keyframes fadeIn { + 0% {opacity:0;} + 100% {opacity:1;} +} + +.footer__link-item { + margin-left: -1.75rem; +} + +.footer__link-item[href*="api"] { + display: flex; +} + +.footer__link-item[href*="api"]:before { + mask: url("/img/book-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: white; + width: 0.875rem; + height: 1rem; + margin-top: 0.5rem; + margin-right: 0.6875rem; + margin-left: 0.1875rem; +} + + +.footer__link-item[href*="dev"] { + display: flex; +} + +.footer__link-item[href*="dev"]:before { + mask: url("/img/hat-wizard-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: white; + width: 1rem; + height: 1rem; + margin-top: 0.5rem; + margin-left: 0.125rem; + margin-right: 0.625rem; +} + +.footer__link-item[href*="users"] { + display: flex; +} + +.footer__link-item[href*="users"]:before { + mask: url("/img/user-astronaut-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: white; + width: 0.875rem; + height: 1rem; + margin-top: 0.5rem; + margin-right: 0.6875rem; + margin-left: 0.1875rem; +} + +.footer__link-item[href*="projects"] { + display: flex; +} + +.footer__link-item[href*="projects"]:before { + mask: url("/img/tasks-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: white; + width: 0.875rem; + height: 1rem; + margin-top: 0.5rem; + margin-right: 0.6875rem; + margin-left: 0.1875rem; +} + +.footer__link-item[href*="fork"] { + display: flex; +} + +.footer__link-item[href*="fork"]:before { + mask: url("/img/code-branch-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: white; + width: 0.75rem; + height: 1rem; + margin-top: 0.5rem; + margin-right: 0.875rem; + margin-left: 0.1875rem; +} + +.footer__link-item[href*="choose"] { + display: flex; +} + +.footer__link-item[href*="choose"]:before { + mask: url("/img/hand-sparkles-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: white; + width: 1.25rem; + height: 1rem; + margin-top: 0.5rem; + margin-right: 0.5rem; +} + +.footer__link-item[href*="discussions"] { + display: flex; +} + +.footer__link-item[href*="discussions"]:before { + mask: url("/img/comments-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: white; + width: 1.125rem; + height: 1rem; + margin-top: 0.5rem; + margin-right: 0.5625rem; + margin-left: 0.0625rem; +} + +.footer__link-item[href*="feedback"] { + display: flex; +} + +.footer__link-item[href*="feedback"]:before { + mask: url("/img/smile-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: white; + width: 0.99rem; + height: 1rem; + margin-top: 0.5rem; + margin-right: 0.63rem; + margin-left: 0.13rem; +} + +.footer__link-item[href*="LICENSE.txt"] { + display: flex; +} + +.footer__link-item[href*="LICENSE.txt"]:before { + mask: url("/img/scroll-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: white; + width: 1.25rem; + height: 1rem; + margin-top: 0.5rem; + margin-right: 0.5rem; + /*margin-left: 0.1875rem;*/ +} + +.footer__link-item[href*="code-of-conduct/"] { + display: flex; +} + +.footer__link-item[href*="code-of-conduct/"]:before { + mask: url("/img/handshake-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: white; + width: 1.25rem; + height: 1rem; + margin-top: 0.5rem; + margin-right: 0.5rem; + /*margin-left: 0.1875rem;*/ +} + +.footer__link-item[href*="legal/terms"]:before { + mask: url("/img/scroll-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: white; + width: 1.25rem; + height: 1rem; + margin-top: 0.5rem; + margin-right: 0.5rem; + /*margin-left: 0.1875rem;*/ +} + +.footer__link-item[href*="legal/privacy"]:before { + mask: url("/img/handshake-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: white; + width: 1.25rem; + height: 1rem; + margin-top: 0.5rem; + margin-right: 0.5rem; + /*margin-left: 0.1875rem;*/ +} + +.button[href*="user"] { + display: flex; +} + +.button[href*="user"]:before { + mask: url("/img/play-circle-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: white; + width: 1.0rem; + height: 1rem; + margin-top: 0.5rem; + margin-right: 0.5rem; +} + +.button[href*="dev"] { + display: flex; +} + +.button[href*="dev"]:before { + mask: url("/img/tools-solid.svg"); + mask-size: cover; + display: inline-block; + content: ''; + background-color: white; + width: 1.0rem; + height: 1rem; + margin-top: 0.5rem; + margin-right: 0.5rem; +} + +html[data-theme="dark"] .button[href*="user"]:before { + background-color: black; +} + +html[data-theme="dark"] .button[href*="dev"]:before { + background-color: black; +} + +.footer__link-item:hover:before { + background-color: var(--ifm-footer-link-hover-color); +} + +.navbar__item.navbar__link--active[href*="user"]:before { + background-color: var(--ifm-navbar-link-hover-color); +} + +.navbar__item.navbar__link--active[href*="dev"]:before { + background-color: var(--ifm-navbar-link-hover-color); +} + +.navbar__item.navbar__link--active[href*="api"]:before { + background-color: var(--ifm-navbar-link-hover-color); +} + +/*.tooltip { + position: relative; + display: inline-block; +} + +.tooltip .tooltiptext::after { + content: " "; + position: absolute; + bottom: 100%; + left: 50%; + margin-left: -5px; + border-width: 5px; + border-style: solid; + border-color: transparent transparent black transparent; +}*/ diff --git a/website/src/pages/index.js b/website/src/pages/index.js new file mode 100644 index 00000000..d482e301 --- /dev/null +++ b/website/src/pages/index.js @@ -0,0 +1,35 @@ +import React from 'react'; +import clsx from 'clsx'; +import Layout from '@theme/Layout'; +import Link from '@docusaurus/Link'; +import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; +import useBaseUrl from '@docusaurus/useBaseUrl'; +import styles from './styles.module.css'; + +// Prism (Rust) +import Prism from "prism-react-renderer/prism"; +(typeof global !== "undefined" ? global : window).Prism = Prism; +require("prismjs/components/prism-rust"); + +// Our theme +import Examples from "@theme/Examples"; +import Features from "@theme/Features"; +import Hero from "@theme/Hero"; + +function Home() { + const context = useDocusaurusContext(); + const {siteConfig = {}} = context; + return ( + + +
+ + +
+
+ ); +} + +export default Home; diff --git a/website/src/pages/styles.module.css b/website/src/pages/styles.module.css new file mode 100644 index 00000000..53ddcd63 --- /dev/null +++ b/website/src/pages/styles.module.css @@ -0,0 +1,42 @@ +/* stylelint-disable docusaurus/copyright-header */ + +/** + * CSS files with the .module.css suffix will be treated as CSS modules + * and scoped locally. + */ + +.heroBanner { + padding: 4rem 0; + text-align: center; + position: relative; + overflow: hidden; +} + +@media screen and (max-width: 966px) { + .heroBanner { + padding: 2rem; + } +} + +.heroImg { + height: 200px; + margin: 10px 0; +} + +.buttons { + display: flex; + align-items: center; + justify-content: center; +} + +.features { + display: flex; + align-items: center; + padding: 2rem 0; + width: 100%; +} + +.featureImage { + height: 200px; + width: 200px; +} diff --git a/website/src/theme/CodeSnippet/index.js b/website/src/theme/CodeSnippet/index.js new file mode 100644 index 00000000..a5081c7d --- /dev/null +++ b/website/src/theme/CodeSnippet/index.js @@ -0,0 +1,57 @@ +import React, { useEffect, useState } from "react"; +import Highlight, { defaultProps } from "prism-react-renderer"; +import useDocusaurusContext from "@docusaurus/useDocusaurusContext"; +import useThemeContext from "@theme/hooks/useThemeContext"; + +import styles from "./styles.module.css"; + +function CodeSnippet(props) { + const { + siteConfig: { + themeConfig: { prism = {} }, + }, + } = useDocusaurusContext(); + + const [mounted, setMounted] = useState(false); + // The Prism theme on SSR is always the default theme but the site theme + // can be in a different mode. React hydration doesn't update DOM styles + // that come from SSR. Hence force a re-render after mounting to apply the + // current relevant styles. There will be a flash seen of the original + // styles seen using this current approach but that's probably ok. Fixing + // the flash will require changing the theming approach and is not worth it + // at this point. + useEffect(() => { + setMounted(true); + }, []); + + const { isDarkTheme } = useThemeContext(); + const lightModeTheme = prism.theme; + const darkModeTheme = prism.darkTheme || lightModeTheme; + const prismTheme = isDarkTheme ? darkModeTheme : lightModeTheme; + + const { language = "python", code } = props; + + return ( + + {({ className, style, tokens, getLineProps, getTokenProps }) => ( +
+          {tokens.map((line, i) => (
+            
+ {line.map((token, key) => ( + + ))} +
+ ))} +
+ )} +
+ ); +} + +export default CodeSnippet; diff --git a/website/src/theme/CodeSnippet/styles.module.css b/website/src/theme/CodeSnippet/styles.module.css new file mode 100644 index 00000000..e88b5dbb --- /dev/null +++ b/website/src/theme/CodeSnippet/styles.module.css @@ -0,0 +1,3 @@ +.code { + font-size: 10pt; +} diff --git a/website/src/theme/Examples/index.js b/website/src/theme/Examples/index.js new file mode 100644 index 00000000..ce533ad1 --- /dev/null +++ b/website/src/theme/Examples/index.js @@ -0,0 +1,63 @@ +import React from "react"; +import CodeSnippet from "@theme/CodeSnippet"; +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; + +import Headline from "@theme/Headline"; +import snippets from "./snippets"; +import styles from "./styles.module.css"; + +function renderTabs() { + return ( + { + return { label: props.label, value: props.label }; + })} + className={styles.tabs} + > + {snippets.map((props, idx) => ( + + + + ))} + + ); +} + +function Examples() { + return ( + <> + {snippets && snippets.length && ( +
+
+
+
+ + {renderTabs()} +
+
+
+ + + + + + + + + + +
+
+
+
+
+ )} + + ); +} + +export default Examples; diff --git a/website/src/theme/Examples/snippets.js b/website/src/theme/Examples/snippets.js new file mode 100644 index 00000000..2083f6f9 --- /dev/null +++ b/website/src/theme/Examples/snippets.js @@ -0,0 +1,43 @@ +const snippets = [ + { + label: "Bivariate Normal", + code: +`import torch +import flowtorch.bijectors as bij +import flowtorch.distributions as dist +import flowtorch.parameters as params + +# Lazily instantiated flow plus base and target distributions +params = params.DenseAutoregressive(hidden_dims=(32,)) +bijectors = bij.AffineAutoregressive(params=params) +base_dist = torch.distributions.Independent( + torch.distributions.Normal(torch.zeros(2), torch.ones(2)), + 1 +) +target_dist = torch.distributions.Independent( + torch.distributions.Normal(torch.zeros(2)+5, torch.ones(2)*0.5), + 1 +) + +# Instantiate transformed distribution and parameters +flow = dist.Flow(base_dist, bijectors) + +# Training loop +opt = torch.optim.Adam(flow.parameters(), lr=5e-3) +frame = 0 +for idx in range(3001): + opt.zero_grad() + + # Minimize KL(p || q) + y = target_dist.sample((1000,)) + loss = -flow.log_prob(y).mean() + + if idx % 500 == 0: + print('epoch', idx, 'loss', loss) + + loss.backward() + opt.step()`, + }, +]; + +export default snippets; diff --git a/website/src/theme/Examples/styles.module.css b/website/src/theme/Examples/styles.module.css new file mode 100644 index 00000000..5229caf5 --- /dev/null +++ b/website/src/theme/Examples/styles.module.css @@ -0,0 +1,105 @@ +/*.examples { + border-top: 1px solid var(--ifm-color-emphasis-300); +}*/ + +.examples { + margin-bottom: 3.5rem; +} + +.animation_svg { + margin: auto; + padding-top: 8rem; + width: 30rem; + height: 38rem; +} + +.animation > * { + opacity: 0; + animation-duration: 7s; + animation-iteration-count: infinite; + animation-timing-function: steps(1); +} + +.example_container { display: flex; justify-content: center; height: 721px;} + +html[data-theme="dark"] .example_container svg { + filter: invert(100%); +} + +@keyframes animation-1 { + 0% { + opacity: 1; + } + 16.66666% { + opacity: 0; + } +} + +.animation > :nth-child(1) { + animation-name: animation-1; +} + +@keyframes animation-2 { + 16.66666% { + opacity: 1; + } + 33.33333% { + opacity: 0; + } +} + +.animation > :nth-child(2) { + animation-name: animation-2; +} + +@keyframes animation-3 { + 33.33333% { + opacity: 1; + } + 50% { + opacity: 0; + } +} + +.animation > :nth-child(3) { + animation-name: animation-3; +} + +@keyframes animation-4 { + 50% { + opacity: 1; + } + 66.66667% { + opacity: 0; + } +} + +.animation > :nth-child(4) { + animation-name: animation-4; +} + +@keyframes animation-5 { + 66.66667% { + opacity: 1; + } + 83.33333% { + opacity: 0; + } +} + +.animation > :nth-child(5) { + animation-name: animation-5; +} + +@keyframes animation-6 { + 83.33333% { + opacity: 1; + } + 100% { + opacity: 1; + } +} + +.animation > :nth-child(6) { + animation-name: animation-6; +} diff --git a/website/src/theme/Features/index.js b/website/src/theme/Features/index.js new file mode 100644 index 00000000..98ee6519 --- /dev/null +++ b/website/src/theme/Features/index.js @@ -0,0 +1,85 @@ +import React from "react"; +import clsx from "clsx"; +import { FaMeteor, FaDumbbell, FaHandsHelping, FaCubes, FaIndustry } from "react-icons/fa"; + +import styles from "./styles.module.css"; + +const size = 24; +const data = [ + { + icon: , + title: <>Simple but powerful, + description: ( + <> + Design, train, and sample from complex probability distributions using only a few lines of code. Advanced features such as conditionality, caching, and structured representations are planned for future released. + + ), + }, + { + icon: , + title: <>Community focused, + description: ( + <> + We help you be a successful user or contributor through detailed user, developer, and API guides. Educational tutorials and research benchmarks are planned for the future. We welcome your feedback! + + ), + }, + { + icon: , + title: <>Modular and extendable, + description: ( + <> + Combine multiple bijections to form complex normalizing flows, and mix-and-match conditioning networks with bijections. + FlowTorch has a well-defined interface so you easily create your own components! + + ), + }, + { + icon: , + title: <>Production ready, + description: ( + <> + Proven code migrated from Pyro, with improved unit testing and continuous integration. + And it is easy to add standard unit tests to components you write yourself! + + ), + }, +]; + +function Feature({ icon, title, description }) { + return ( +
+
+
+ {icon &&
{icon}
} +

{title}

+
+

{description}

+
+
+ ); +} + +function Features() { + return ( + <> + {data && data.length && ( +
+
+
+
+
+ {data.map((props, idx) => ( + + ))} +
+
+
+
+
+ )} + + ); +} + +export default Features; diff --git a/website/src/theme/Features/styles.module.css b/website/src/theme/Features/styles.module.css new file mode 100644 index 00000000..78aab439 --- /dev/null +++ b/website/src/theme/Features/styles.module.css @@ -0,0 +1,40 @@ +.features { + display: flex; + align-items: center; + width: 100%; + padding-bottom: 1rem !important; + padding-top: 6rem; + background-color: --var(--ifm-background-color); +} + +.features .feature p { + margin-bottom: 0; +} + +.features .feature:not(:last-child) { + margin-bottom: 3rem; +} + +/* @media screen and (min-width: 576px) { + margin-bottom: 3rem; + padding-right: 5rem; + }*/ + +.features .feature .header { + display: flex; + flex-direction: row; + align-items: center; + margin-bottom: 1rem; +} + +.features .feature .header .icon { + display: flex; + align-items: center; + margin-right: 1rem; + color: var(--ifm-color-primary); +} + +.features .feature .header .title { + font-size: 1.25rem; + margin-bottom: 0; +} diff --git a/website/src/theme/Headline/index.js b/website/src/theme/Headline/index.js new file mode 100644 index 00000000..7887a90c --- /dev/null +++ b/website/src/theme/Headline/index.js @@ -0,0 +1,33 @@ +import React from "react"; +import { PropTypes } from "prop-types"; + +import styles from "./styles.module.css"; + +function Headline(props) { + const { category, title, subtitle, offset } = props; + + return ( +
+
+
+ {category && {category}} + {title &&

{title}

} + {subtitle &&

{subtitle}

} +
+
+
+ ); +} + +Headline.propTypes = { + category: PropTypes.string, + title: PropTypes.string, + subtitle: PropTypes.string, + offset: PropTypes.number, +}; + +Headline.defaultProps = { + offset: 0, +}; + +export default Headline; diff --git a/website/src/theme/Headline/styles.module.css b/website/src/theme/Headline/styles.module.css new file mode 100644 index 00000000..f2e3bddd --- /dev/null +++ b/website/src/theme/Headline/styles.module.css @@ -0,0 +1,28 @@ +.headline { + margin-top: 2rem; +} + +.category { + display: inline-flex; + align-items: center; + margin-bottom: 1rem; + font-weight: bold; + text-transform: uppercase; + color: var(--ifm-color-primary-light); +} + +.title { + max-width: 500px; + font-size: 2rem; + line-height: initial; + word-spacing: -0.25rem; + + @media screen and (min-width: 576px) { + font-size: 2.4rem; + } +} + +.subtitle { + margin-top: 2rem; + color: var(--ifm-color-emphasis-600); +} diff --git a/website/src/theme/Hero/index.js b/website/src/theme/Hero/index.js new file mode 100644 index 00000000..6e456743 --- /dev/null +++ b/website/src/theme/Hero/index.js @@ -0,0 +1,62 @@ +import React from "react"; +import clsx from "clsx"; +import Link from "@docusaurus/Link"; +import useBaseUrl from "@docusaurus/useBaseUrl"; +import useDocusaurusContext from "@docusaurus/useDocusaurusContext"; + +import styles from "./styles.module.css"; + +function Hero() { + const context = useDocusaurusContext(); + const { siteConfig = {} } = context; + + return ( +
+
+
+ +

+ + Easily learn and sample complex probability distributions with PyTorch +

+ +
+
+
+ + Get Started + +
+
+ + Contribute + + +