From c57b27da44ac2f11e89483e91a7fa9c189cd2f24 Mon Sep 17 00:00:00 2001 From: Matt Jones Date: Wed, 31 Jan 2024 10:37:16 -0900 Subject: [PATCH 01/14] Initial pyproject.toml package configuration file. --- pyproject.toml | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1413902 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,37 @@ +[tool.poetry] +name = "pdgstaging" +version = "0.9.1" +description = "Geospatial data tiling workflow" +authors = [ + "Robyn Thiessen-Bock ", + "Juliet Cohen ", + "Lauren Walker ", + "Matthew B. Jones ", +] +license = 'Apache Software License 2.0' +readme = "README.md" +repository = 'https://github.com/PermafrostDiscoveryGateway/viz-staging' +classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Natural Language :: English', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.9', +] + +[tool.poetry.dependencies] +python = ">=3.9" +numpy = ">=1.2" +pandas = ">=1.4, < 2.0" +shapely = ">= 2, < 3.0" +geopandas = ">= 0.12.2, < 1.0" +morecantile = ">= 3.1, < 4.0" +Rtree = ">= 0.9, < 1.0" +filelock = ">= 3.6, < 4.0" +coloraide = ">= 0.10, < 1" +colormaps = "== 0.4.0" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" From c175601a67f20b0d93a393c679881953f1a3b794 Mon Sep 17 00:00:00 2001 From: Matt Jones Date: Wed, 31 Jan 2024 11:28:48 -0900 Subject: [PATCH 02/14] Basic pytest framework setup. --- .gitignore | 5 ++++- .vscode/settings.json | 7 +++++++ pyproject.toml | 3 +++ setup.py | 37 ------------------------------------- tests/test_loaddata.py | 12 ++++++++++++ 5 files changed, 26 insertions(+), 38 deletions(-) create mode 100644 .vscode/settings.json delete mode 100644 setup.py create mode 100644 tests/test_loaddata.py diff --git a/.gitignore b/.gitignore index bbeca65..c5697d4 100644 --- a/.gitignore +++ b/.gitignore @@ -129,4 +129,7 @@ dmypy.json .pyre/ # macOS -.DS_Store \ No newline at end of file +.DS_Store + +# Poetry +poetry.lock diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..9b38853 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 1413902..60d2432 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,9 @@ filelock = ">= 3.6, < 4.0" coloraide = ">= 0.10, < 1" colormaps = "== 0.4.0" +[tool.poetry.group.dev.dependencies] +pytest = ">=7" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/setup.py b/setup.py deleted file mode 100644 index 02a9381..0000000 --- a/setup.py +++ /dev/null @@ -1,37 +0,0 @@ -from setuptools import setup - -with open('README.md', 'r') as fh: - long_description = fh.read() - -setup( - author='Robyn Thiessen-Bock, Juliet Cohen', - author_email='thiessenbock@nceas.ucsb.edu, jcohen@nceas.ucsb.edu', - name='pdgstaging', - version='0.9.0', - description='PDG Visualization staging pipeline', - long_description=long_description, - long_description_content_type='text/markdown', - url='https://github.com/PermafrostDiscoveryGateway/viz-staging', - packages=['pdgstaging'], - install_requires=[ - 'numpy >= 1.2, < 2.0', - 'pandas >= 1.4, < 2.0', - 'shapely >= 2, < 3.0', - 'geopandas >= 0.12.2, < 1.0', - 'morecantile >= 3.1, < 4.0', - 'Rtree >= 0.9, < 1.0', - 'filelock >= 3.6, < 4.0', - 'coloraide >= 0.10, < 1', - 'colormaps == 0.4.0' - ], - python_requires='>=3.9, <4.0', - classifiers=[ - 'Development Status :: 2 - Pre-Alpha', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Apache Software License', - 'Natural Language :: English', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.9', - ], - license='Apache Software License 2.0', -) diff --git a/tests/test_loaddata.py b/tests/test_loaddata.py new file mode 100644 index 0000000..986b9e2 --- /dev/null +++ b/tests/test_loaddata.py @@ -0,0 +1,12 @@ +from pandas import DataFrame + +def test_init(): + """Initialize tests and show they are working. + """ + assert 1 == 1 + +def test_load_data(): + """Load example testing data for staging tests. + """ + df = DataFrame( dict( x=[1,2,3], y=[4,5,6] ) ) + assert df is not None From b06853ad3aacb5272b998e4b3b214becab710af2 Mon Sep 17 00:00:00 2001 From: Matt Jones Date: Wed, 31 Jan 2024 11:32:18 -0900 Subject: [PATCH 03/14] Add dev section to README --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index d0ef8d5..e869716 100644 --- a/README.md +++ b/README.md @@ -78,3 +78,10 @@ The staging process will also output a summary CSV file with one row for each ti - If the deduplication method specified in the configuration is `footprints`, the footprint file(s) are provided with a structure that follows the [docs](https://github.com/PermafrostDiscoveryGateway/viz-staging/blob/main/docs/footprints.md). - In order for logging to work properly, the node running the script that uses this package has a `/tmp` directory so the `log.log` file can populate there. +## Development + +Build and test using poetry and pytest. + +- To build, run `poetry build` +- To test, run `pytest` from the root of the package directory +- VS Code configuration is setup to configure tests as well From 7b6d78a7c6d8b8e71bc8bf5d2f3ef7f53cf0f23a Mon Sep 17 00:00:00 2001 From: Juliet Cohen Date: Wed, 31 Jan 2024 13:36:49 -0800 Subject: [PATCH 04/14] Updated README with citation, added python3.10 as an option for setup, and other details similar to those in metacatui README --- README.md | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index d0ef8d5..9a37949 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,27 @@ -# PDG Staging +# Viz-staging: vector staging for the Permafrost Discovery Gateway visualization pipeline -Divides vector files into tiled vector files according to a specified [OGC Two Dimensional Tile Matrix Set](http://docs.opengeospatial.org/is/17-083r2/17-083r2.html) in preparation for processing into other formats in the PDG workflow. +- **Author**: +- **DOI**: doi:10.18739/______ +- **License**: [Apache 2](https://opensource.org/license/apache-2-0/) +- [Package source code on GitHub](https://github.com/PermafrostDiscoveryGateway/viz-staging) +- [Submit bugs and feature requests](https://github.com/PermafrostDiscoveryGateway/viz-staging/issues/new) + +The `pdgstaging` package divides vector files into tiled vector files according to a specified [OGC Two Dimensional Tile Matrix Set](http://docs.opengeospatial.org/is/17-083r2/17-083r2.html) in preparation for processing into other formats in the Permafrost Discovery Gateway (PDG) workflow. ![PDG staging summary](docs/images/staging_tldr.png) +# Citation + +Cite this software as: + +> Robyn Thiessen-Bock, Juliet Cohen, Matt Jones, Kastan Day, Lauren Walker. 2023. Viz-staging: vector staging for the Permafrost Discovery Gateway visualization pipeline (version 0.9.1). Arctic Data Center. doi: 10.18739/______ + ## Install -Requires Python version `3.9` and `libspatialindex` or `libspatialindex-dev` +Requires Python version `3.9` or `3.10` and `libspatialindex` or `libspatialindex-dev` 1. Follow the instructions to install [`libspatialindex`](https://libspatialindex.org/en/latest/) or [`libspatialindex-dev`](https://packages.ubuntu.com/bionic/libspatialindex-dev) -2. Make sure that Python version 3.9 is installed (try `which python3.9`). +2. Make sure that Python version 3.9 or 3.10 is installed (try `which python3.9`). 3. Install `pdgstaging` from GitHub repo using pip: `pip install git+https://github.com/PermafrostDiscoveryGateway/viz-staging.git` ## Usage @@ -20,6 +32,7 @@ Requires Python version `3.9` and `libspatialindex` or `libspatialindex-dev` - run: `python -m pdgstaging -c '/path/to/config.json'` **In python:** + ```python import pdgstaging stager = pdgstaging.TileStager('/path/to/config.json') @@ -29,6 +42,8 @@ stager.stage_all() stager.stage('path/to/input/file.shp') ``` +See more example code in [`PermafrostDiscoveryGateway/viz-info/helpful-code`](https://github.com/PermafrostDiscoveryGateway/viz-info/tree/main/helpful-code) + ## Vector file staging for the PDG tiling pipeline This repository contains code that prepares vector data (e.g. shapefiles, geopackages) for subsequent steps in the [PDG](https://permafrost.arcticdata.io/) tiling pipeline (such as [viz-3dtiles](https://github.com/PermafrostDiscoveryGateway/viz-3dtiles) and [viz-raster](https://github.com/PermafrostDiscoveryGateway/viz-raster)). The staging step creates output vector files that conform to a specified [OGC Two Dimensional Tile Matrix Set](http://docs.opengeospatial.org/is/17-083r2/17-083r2.html) ("TMS"). Specifically, for each input file, the staging process: @@ -78,3 +93,22 @@ The staging process will also output a summary CSV file with one row for each ti - If the deduplication method specified in the configuration is `footprints`, the footprint file(s) are provided with a structure that follows the [docs](https://github.com/PermafrostDiscoveryGateway/viz-staging/blob/main/docs/footprints.md). - In order for logging to work properly, the node running the script that uses this package has a `/tmp` directory so the `log.log` file can populate there. +# License + +``` +Copyright [2013] [Regents of the University of California] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +``` + + From defb0a1736e8be0bfbcf7fe9a4b47df5ac792b12 Mon Sep 17 00:00:00 2001 From: Matt Jones Date: Fri, 2 Feb 2024 08:21:42 -0900 Subject: [PATCH 05/14] Added CONTRIBUTING doc --- CONTRIBUTING.md | 206 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..87a0412 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,206 @@ +# Contributing + +:tada: First off, thanks for contributing! :tada: + +- [Types of contributions](#types-of-contributions) +- [Pull Requests](#pull-requests) +- [Development Workflow](#development-workflow) +- [Release process](#release-process) +- [Testing](#testing) +- [Code style](#code-style) +- [Contributor license agreement](#contributor-license-agreement) + +## Types of contributions + +We welcome all types of contributions, including bug fixes, feature enhancements, +bug reports, documentation, graphics, and many others. You might consider contributing by: + +- Report a bug or request a new feature in our [issue tracker](https://github.com/PermafrostDiscoveryGateway/viz-staging/issues) +- Fix a bug and contribute the code with a Pull Request +- Write or edit some documentation +- Sharing helpful tips or FAQ-type answers to users or future contributors +- Create screenshots or tutorials of features of MetacatUI +- Answer questions on DataONE Discussions +- ... + +This is an open source project, and we welcome full +participation in the project. Contributions are reviewed and suggestions are +made to increase the value of this software to the community. We strive to +incorporate code, documentation, and other useful contributions quickly and +efficiently while maintaining a high-quality software product. + +## Pull Requests +We use the pull-request model for contributions. See [GitHub's help on pull-requests](https://help.github.com/articles/about-pull-requests/). + +In short: + +- add an [issue](https://github.com/PermafrostDiscoveryGateway/viz-staging/issues) describing your planned changes, or add a comment to an existing issue; +- on GitHub, fork the [repository](https://github.com/PermafrostDiscoveryGateway/viz-staging) +- on your computer, clone your forked copy of the repository +- base your work on the `develop` branch and commit your changes +- push your branch to your forked repository, and submit a pull-request +- our team will be notified of your Pull Request and will review your changes +- our team may request changes before we will approve the Pull Request, or we will make them for you +- once the code is reviewed, our team will merge in your changes to `develop` for the next planned release + +## Development Workflow + +Development is managed through the git repository at https://github.com/PermafrostDiscoveryGateway/viz-staging. The repository is organized into several branches, each with a specific purpose. + +**main**. The `main` branch represents the stable branch that is constantly maintained with the current release. It should generally be safe to install and use the `main` branch the same way as binary releases. The version number in all configuration files and the README on the `main` branch follows [semantic versioning](https://semver.org/) and should always be set to the current stable release, for example `2.8.5`. + +**develop**. Development takes place on a single branch for integrated development and testing of the set of features +targeting the next release. Commits should only be pushed to this branch once they are ready to be deployed to +production immediately after being pushed. This keeps the `develop` branch in a state of readiness for the next release. +Any unreleased code changes on the `develop` branch represent changes that have been tested and staged for the next +release. +The tip of the `develop` branch always represents the set of features that are awaiting the next release. The develop +branch represents the opportunity to integrate changes from multiple features for integrated testing before release. + +Version numbers on the `develop` branch represent either the planned next release number (e.g., `2.9.0`), or the planned next release number with a `beta` designator or release candidate `rc` designator appended as appropriate. For example, `2.8.6-beta1` or `2.9.0-rc1`. + +**feature**. To isolate development on a specific set of capabilities, especially if it may be disruptive to other +developers working on the `develop` branch, feature branches should be created. + +Feature branches are named as `feature-` + `{issue}` + `-{short-description}`, with `{issue}` being the GitHub issue number related to that new feature. e.g. `feature-23-refactor-storage`. + +All `feature-*` branches should be frequently merged with changes from `develop` to +ensure that the branch stays up to date with other features that have +been tested and are awaiting release. Thus, each `feature-*` branch can be tested on its own before it is merged with other features on develop, and afterwards as well. Once a feature is complete and ready for full integration testing, it is generally merged into the `develop` branch after review through a pull request. + +**bugfix**. A final branch type are `bugfix` branches, which work the same as feature branches, but fix bugs rather than adding new functionality. Sometimes it is hard to distinguish features from bug fixes, so some repositories may choose to use `feature` branches for both types of change. Bugfix branches are named similarly, following the pattern: `bugfix-` + `{issue}` + `-{short-description}`, with `{issue}` being the GitHub issue number related to that bug. e.g. `bugfix-83-fix-name-display`. + +### Development flow overview + +```mermaid +%%{init: { 'theme': 'base', + 'gitGraph': { + 'rotateCommitLabel': false, + 'showCommitLabel': false + }, + 'themeVariables': { + 'commitLabelColor': '#ffffffff', + 'commitLabelBackground': '#000000' + } +}}%% +gitGraph + commit id: "1" tag: "v1.0.0" + branch develop + checkout develop + commit id: "2" + branch feature-A + commit id: "3" + commit id: "4" + checkout develop + merge feature-A id: "5" + commit id: "6" + commit id: "7" + branch feature-B + commit id: "8" + commit id: "9" + checkout develop + merge feature-B id: "10" type: NORMAL + checkout main + merge develop id: "11" tag: "v1.1.0" +``` + +## Release process + +1. Our release process starts with integration testing in a `develop` branch. Once all +changes that are desired in a release are merged into the `develop` branch, we run +the full set of tests on a clean checkout of the `develop` branch. +2. After testing, the `develop` branch is merged to main, and the `main` branch is tagged with +the new version number (e.g. `2.11.2`). At this point, the tip of the `main` branch will +reflect the new release and the `develop` branch can be fast-forwarded to sync with `main` to +start work on the next release. +3. Releases can be downloaded from the [GitHub releases page](https://github.com/PermafrostDiscoveryGateway/viz-staging/releases). + +## Testing + +**Unit and integration tests**. We strive for a full suite of `pytest` tests in the `tests` subdirectory. +Any new code developed should include a robust set of tests for each public +method, as well as integration tests from new feature sets. Tests should fully +exercise the feature to ensure that it responds correctly to both good data inputs +and various classes of corrupt or bad data. All tests should pass before submitting a PR +or merging to `develop`. + +Tests are automatically run via GitHub Actions. Check the root `README.md` file +for this GitHub Actions status badge and make sure it says "Passing": + +## Code style + +Code should be written to professional standards to enable clean, well-documented, +readable, and maintainable software. While there has been significant variability +in the coding styles applied historically, new contributions should strive for +clean code formatting. We generally follow PEP8 guidelines for Python code formatting, +typically enforced through the `black` code formatting package. + +## Contributor license agreement + +In order to clarify the intellectual property license +granted with Contributions from any person or entity, you agree to +a Contributor License Agreement ("CLA") with the Regents of the University of +California (hereafter, the "Regents"). + +1. Definitions. + "You" (or "Your") shall mean the copyright owner or legal entity + authorized by the copyright owner that is making this Agreement + with the Regents. For legal entities, the entity making a + Contribution and all other entities that control, are controlled + by, or are under common control with that entity are considered to + be a single Contributor. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + "Contribution" shall mean any original work of authorship, + including any modifications or additions to an existing work, that + is intentionally submitted by You to the Regents for inclusion + in, or documentation of, any of the products owned or managed by + the Regents (the "Work"). For the purposes of this definition, + "submitted" means any form of electronic, verbal, or written + communication sent to the Regents or its representatives, + including but not limited to communication on electronic mailing + lists, source code control systems, and issue tracking systems that + are managed by, or on behalf of, the Regents for the purpose of + discussing and improving the Work, but excluding communication that + is conspicuously marked or otherwise designated in writing by You + as "Not a Contribution." +2. Grant of Copyright License. Subject to the terms and conditions of + this Agreement, You hereby grant to the Regents and to + recipients of software distributed by the Regents a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare derivative works of, + publicly display, publicly perform, sublicense, and distribute Your + Contributions and such derivative works. +3. Grant of Patent License. Subject to the terms and conditions of + this Agreement, You hereby grant to the Regents and to + recipients of software distributed by the Regents a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have + made, use, offer to sell, sell, import, and otherwise transfer the + Work, where such license applies only to those patent claims + licensable by You that are necessarily infringed by Your + Contribution(s) alone or by combination of Your Contribution(s) + with the Work to which such Contribution(s) was submitted. If any + entity institutes patent litigation against You or any other entity + (including a cross-claim or counterclaim in a lawsuit) alleging + that your Contribution, or the Work to which you have contributed, + constitutes direct or contributory patent infringement, then any + patent licenses granted to that entity under this Agreement for + that Contribution or Work shall terminate as of the date such + litigation is filed. +4. You represent that you are legally entitled to grant the above + license. If your employer(s) has rights to intellectual property + that you create that includes your Contributions, you represent + that you have received permission to make Contributions on behalf + of that employer, that your employer has waived such rights for + your Contributions to the Regents, or that your employer has + executed a separate Corporate CLA with the Regents. +5. You represent that each of Your Contributions is Your original + creation (see section 7 for submissions on behalf of others). You + represent that Your Contribution submissions include complete + details of any third-party license or other restriction (including, + but not limited to, related patents and trademarks) of which you + are personally aware and which are associated with any part of Your + Contributions. From 50449ab3480d74b63be66254c818f4c95b758311 Mon Sep 17 00:00:00 2001 From: Matt Jones Date: Fri, 2 Feb 2024 11:46:06 -0900 Subject: [PATCH 06/14] Add missing dependency on pydantic. Without this, the import for ConfigManager fails. --- pyproject.toml | 1 + tests/test_loaddata.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 60d2432..1189348 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ Rtree = ">= 0.9, < 1.0" filelock = ">= 3.6, < 4.0" coloraide = ">= 0.10, < 1" colormaps = "== 0.4.0" +pydantic = "1.10.9" [tool.poetry.group.dev.dependencies] pytest = ">=7" diff --git a/tests/test_loaddata.py b/tests/test_loaddata.py index 986b9e2..da2a213 100644 --- a/tests/test_loaddata.py +++ b/tests/test_loaddata.py @@ -1,10 +1,11 @@ from pandas import DataFrame +from pdgstaging import ConfigManager def test_init(): """Initialize tests and show they are working. """ assert 1 == 1 - + def test_load_data(): """Load example testing data for staging tests. """ From cd6fbadb31f419d680e7a948c448170036de0780 Mon Sep 17 00:00:00 2001 From: Juliet Cohen Date: Wed, 7 Feb 2024 15:46:48 -0800 Subject: [PATCH 07/14] added authors and release DOI of version 0.9.1 to README --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9a37949..8476756 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Viz-staging: vector staging for the Permafrost Discovery Gateway visualization pipeline -- **Author**: -- **DOI**: doi:10.18739/______ +- **Authors**: Robyn Thiessen-Bock ; Juliet Cohen ; Matthew B. Jones ; Kastan Day ; Lauren Walker +- **DOI**: 10.18739/A2RV0D26C - **License**: [Apache 2](https://opensource.org/license/apache-2-0/) - [Package source code on GitHub](https://github.com/PermafrostDiscoveryGateway/viz-staging) - [Submit bugs and feature requests](https://github.com/PermafrostDiscoveryGateway/viz-staging/issues/new) @@ -14,7 +14,7 @@ The `pdgstaging` package divides vector files into tiled vector files according Cite this software as: -> Robyn Thiessen-Bock, Juliet Cohen, Matt Jones, Kastan Day, Lauren Walker. 2023. Viz-staging: vector staging for the Permafrost Discovery Gateway visualization pipeline (version 0.9.1). Arctic Data Center. doi: 10.18739/______ +> Robyn Thiessen-Bock, Juliet Cohen, Matt Jones, Kastan Day, Lauren Walker. 2023. Viz-staging: vector staging for the Permafrost Discovery Gateway visualization pipeline (version 0.9.1). Arctic Data Center. doi: 10.18739/A2RV0D26C ## Install From 17b04434ff4dcf841c3312aca1c06cb865ecec56 Mon Sep 17 00:00:00 2001 From: Juliet Cohen Date: Wed, 7 Feb 2024 15:57:33 -0800 Subject: [PATCH 08/14] added hyperlink to EZID page for DOI and changed size of section headers in README --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8476756..9d0b3de 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Viz-staging: vector staging for the Permafrost Discovery Gateway visualization pipeline - **Authors**: Robyn Thiessen-Bock ; Juliet Cohen ; Matthew B. Jones ; Kastan Day ; Lauren Walker -- **DOI**: 10.18739/A2RV0D26C +- **DOI**: [10.18739/A2RV0D26C](https://ezid.cdlib.org/id/doi:10.18739/A2RV0D26C) - **License**: [Apache 2](https://opensource.org/license/apache-2-0/) - [Package source code on GitHub](https://github.com/PermafrostDiscoveryGateway/viz-staging) - [Submit bugs and feature requests](https://github.com/PermafrostDiscoveryGateway/viz-staging/issues/new) @@ -10,7 +10,7 @@ The `pdgstaging` package divides vector files into tiled vector files according ![PDG staging summary](docs/images/staging_tldr.png) -# Citation +## Citation Cite this software as: @@ -93,7 +93,7 @@ The staging process will also output a summary CSV file with one row for each ti - If the deduplication method specified in the configuration is `footprints`, the footprint file(s) are provided with a structure that follows the [docs](https://github.com/PermafrostDiscoveryGateway/viz-staging/blob/main/docs/footprints.md). - In order for logging to work properly, the node running the script that uses this package has a `/tmp` directory so the `log.log` file can populate there. -# License +## License ``` Copyright [2013] [Regents of the University of California] From 0710a153d7aa8905a8336319abdf46fd87a97557 Mon Sep 17 00:00:00 2001 From: Juliet Cohen Date: Thu, 8 Feb 2024 08:43:44 -0800 Subject: [PATCH 09/14] added development section written by Matt --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 9d0b3de..bf25d4f 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,14 @@ The staging process will also output a summary CSV file with one row for each ti - If the deduplication method specified in the configuration is `footprints`, the footprint file(s) are provided with a structure that follows the [docs](https://github.com/PermafrostDiscoveryGateway/viz-staging/blob/main/docs/footprints.md). - In order for logging to work properly, the node running the script that uses this package has a `/tmp` directory so the `log.log` file can populate there. +## Development + +Build and test using poetry and pytest. + +- To build, run `poetry build` +- To test, run `pytest` from the root of the package directory +- VS Code configuration is setup to configure tests as well + ## License ``` From 334fffac95d5679c9fc1255b6c796524325b2466 Mon Sep 17 00:00:00 2001 From: Juliet Cohen Date: Thu, 8 Feb 2024 09:36:34 -0800 Subject: [PATCH 10/14] updated title in README header and citation to Matt's preference --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index bf25d4f..0ddda3e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Viz-staging: vector staging for the Permafrost Discovery Gateway visualization pipeline +# Viz-staging: vector data tiling for geospatial visualization - **Authors**: Robyn Thiessen-Bock ; Juliet Cohen ; Matthew B. Jones ; Kastan Day ; Lauren Walker - **DOI**: [10.18739/A2RV0D26C](https://ezid.cdlib.org/id/doi:10.18739/A2RV0D26C) @@ -14,7 +14,7 @@ The `pdgstaging` package divides vector files into tiled vector files according Cite this software as: -> Robyn Thiessen-Bock, Juliet Cohen, Matt Jones, Kastan Day, Lauren Walker. 2023. Viz-staging: vector staging for the Permafrost Discovery Gateway visualization pipeline (version 0.9.1). Arctic Data Center. doi: 10.18739/A2RV0D26C +> Robyn Thiessen-Bock, Juliet Cohen, Matt Jones, Kastan Day, Lauren Walker. 2023. Viz-staging: vector data tiling for geospatial visualization (version 0.9.1). Arctic Data Center. doi: 10.18739/A2RV0D26C ## Install From 6fefe8b83f23ec6f88fdd6c22e9ee203e784e740 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Tue, 19 Mar 2024 15:53:19 -0700 Subject: [PATCH 11/14] remove upper bound on pandas dependency --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1189348..85bcac2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ classifiers=[ [tool.poetry.dependencies] python = ">=3.9" numpy = ">=1.2" -pandas = ">=1.4, < 2.0" +pandas = ">=1.4" shapely = ">= 2, < 3.0" geopandas = ">= 0.12.2, < 1.0" morecantile = ">= 3.1, < 4.0" From 9f85a87517ccc7d1fa2ff2709594edc3a87238ee Mon Sep 17 00:00:00 2001 From: Juliet Cohen Date: Tue, 28 May 2024 14:29:21 -0700 Subject: [PATCH 12/14] removed lines from contributing doc that are more relevant to metacatui and dataone and less relevant to PDG packages --- CONTRIBUTING.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 87a0412..5f9078e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -19,8 +19,6 @@ bug reports, documentation, graphics, and many others. You might consider contr - Fix a bug and contribute the code with a Pull Request - Write or edit some documentation - Sharing helpful tips or FAQ-type answers to users or future contributors -- Create screenshots or tutorials of features of MetacatUI -- Answer questions on DataONE Discussions - ... This is an open source project, and we welcome full From bead90ddb3d55f84361a0e39e38e83f268a2dd70 Mon Sep 17 00:00:00 2001 From: Juliet Cohen Date: Tue, 28 May 2024 14:38:04 -0700 Subject: [PATCH 13/14] added version restrictions to dependencies, updated version of package for next release --- pyproject.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 85bcac2..5400ce1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pdgstaging" -version = "0.9.1" +version = "0.9.2" description = "Geospatial data tiling workflow" authors = [ "Robyn Thiessen-Bock ", @@ -21,9 +21,9 @@ classifiers=[ ] [tool.poetry.dependencies] -python = ">=3.9" -numpy = ">=1.2" -pandas = ">=1.4" +python = ">= 3.9, < 4.0" +numpy = ">= 1.2, < 2.0" +pandas = ">= 1.4, < 2.0" shapely = ">= 2, < 3.0" geopandas = ">= 0.12.2, < 1.0" morecantile = ">= 3.1, < 4.0" From 55751dff059c92bc090c4c2933c63d50841685dc Mon Sep 17 00:00:00 2001 From: Juliet Cohen Date: Tue, 28 May 2024 14:57:16 -0700 Subject: [PATCH 14/14] removed old comments that were specific to release 0.9.1, changed name of variable that was specific to IWP dataset within clip to footprint step --- pdgstaging/ConfigManager.py | 8 +++----- pdgstaging/Deduplicator.py | 6 +++--- pdgstaging/TileStager.py | 19 +++++++++---------- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/pdgstaging/ConfigManager.py b/pdgstaging/ConfigManager.py index 212c59f..1193910 100644 --- a/pdgstaging/ConfigManager.py +++ b/pdgstaging/ConfigManager.py @@ -275,9 +275,7 @@ class ConfigManager(): the intersecting polygons to be considered a duplicate. If False, then the overlap_tolerance proportion must be True for only one of the intersecting polygons to be considered - a duplicate. Default is True. Note that with release 0.9.0, - the 'neighbor' method has been not been thoroughly tested - and should not be applied to input data. + a duplicate. Default is True. - deduplicate_centroid_tolerance : float, optional For the 'neighbor' deduplication method only. The maximum distance between the centroids of two polygons to be @@ -299,8 +297,8 @@ class ConfigManager(): - deduplicate_clip_to_footprint : bool, optional For the 'footprints' deduplication method only. If True, then polygons that fall outside the bounds of the - associated footprint will be removed. Default is True for - release version 0.9.0, but will be false for future releases. + associated footprint will be removed. Default is True for this + release, but will be false for future releases. - deduplicate_clip_method: str, optional For the 'footprints' deduplication method only, when deduplicate_clip_to_footprint is True. The method to use to diff --git a/pdgstaging/Deduplicator.py b/pdgstaging/Deduplicator.py index 21080e4..2ffa8d4 100644 --- a/pdgstaging/Deduplicator.py +++ b/pdgstaging/Deduplicator.py @@ -478,8 +478,8 @@ def deduplicate_by_footprint( return_intersections : bool, optional If true, the polygons that represent the intersections between footprints will be returned. Default is False. Not currently available - in this release 0.9.0. return_intersections is to be integrated again - in future releases. + in this release. return_intersections is to be integrated again + in a future release. prop_duplicated : str, optional Defaults to "staging_duplicated". The column name / property to use to flag duplicates. @@ -494,7 +494,7 @@ def deduplicate_by_footprint( `intersections` represents the polygon area where the footprints overlap. It has not been integrated into the function again since the deduplication approach changed from returning a dictionary to returning a labeled GDF. - This will be integrated again in releases after 0.9.0. + This will be integrated again in a future release. """ logger.info(f"Executing footprint deduplication.") diff --git a/pdgstaging/TileStager.py b/pdgstaging/TileStager.py index 669a281..a95cd87 100644 --- a/pdgstaging/TileStager.py +++ b/pdgstaging/TileStager.py @@ -130,8 +130,7 @@ def stage(self, path): if (gdf is not None) and (len(gdf) > 0): gdf = self.simplify_geoms(gdf) - # clip to footprint before CRS of IWP data is transformed - # to EPSG:4326 + # If clipping to footprint, do so before CRS is transformed gdf = self.clip_to_footprint(gdf, path) gdf = self.set_crs(gdf) self.grid = self.make_tms_grid(gdf) @@ -177,19 +176,19 @@ def clip_to_footprint(self, gdf, path): fp = self.get_data(fp_path) logger.info(f' Checking CRSs of polygons and footprint.') - iwp_crs = gdf.crs + data_crs = gdf.crs fp_crs = fp.crs - if iwp_crs == fp_crs: - logger.info(f" CRSs match. They are both {iwp_crs}.") + if data_crs == fp_crs: + logger.info(f" CRSs match. They are both {data_crs}.") else: - logger.info(f" CRSs do not match.\n IWP's CRS is {iwp_crs}." + logger.info(f" CRSs do not match.\n Data's CRS is {data_crs}." f" Footprint's CRS is {fp_crs}.") # transform the footprint to the CRS of the polygon data - fp.to_crs(iwp_crs, inplace = True) + fp.to_crs(data_crs, inplace = True) # check again fp_crs_transformed = fp.crs - if iwp_crs == fp_crs_transformed: + if data_crs == fp_crs_transformed: logger.info("Footprint CRS has been transformed to CRS of polygons.") else: logger.error("Failed to transform footprint CRS to CRS of polygons.") @@ -505,8 +504,8 @@ def save_tiles(self, gdf=None): # no polygons will be labeled as duplicates or not. # If deduplicating by footprint: # neither file has been clipped to footprint - logger.info(f"Tile exists but dedup is not set to occur, so just " - f"appending the polygons.") + logger.info(f"Tile exists but dedup is not set to occur, so" + f" appending polygons.") # Append to existing tile mode = 'a'