diff --git a/.github/workflows/build-pipeline.yml b/.github/workflows/build-pipeline.yml new file mode 100644 index 0000000..e996ea3 --- /dev/null +++ b/.github/workflows/build-pipeline.yml @@ -0,0 +1,190 @@ +# This is the main build pipeline that verifies and publishes the software +name: Build + +# Controls when the workflow will run +on: + # Triggers the workflow on push events + push: + branches: [ develop, release/**, main, feature/**, issue/**, issues/** ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +env: + POETRY_VERSION: "1.3.2" + PYTHON_VERSION: "3.10" + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + # The first job in the workflow confirms that the software's own tests pass. + run_tests: + uses: ./.github/workflows/run_tests.yml + + # Second job in the workflow verifies the software + build: + needs: run_tests + runs-on: ubuntu-latest + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - name: Retrieve repository + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + - name: Install Poetry + uses: abatilo/actions-poetry@v3.0.0 + with: + poetry-version: ${{ env.POETRY_VERSION }} + - name: Get version + id: get-version + run: | + echo "current_version=$(poetry version | awk '{print $2}')" >> $GITHUB_OUTPUT + echo "pyproject_name=$(poetry version | awk '{print $1}')" >> $GITHUB_ENV + + # Bumps the version, based on which branch is the target. + - name: Bump pre-alpha version + # If triggered by push to a feature branch + if: | + ${{ startsWith(github.ref, 'refs/heads/issue') }} || + ${{ startsWith(github.ref, 'refs/heads/dependabot/') }} || + ${{ startsWith(github.ref, 'refs/heads/feature/') }} + run: | + new_ver="${{ steps.get-version.outputs.current_version }}+$(git rev-parse --short ${GITHUB_SHA})" + poetry version $new_ver + echo "software_version=$(poetry version | awk '{print $2}')" >> $GITHUB_ENV + - name: Bump alpha version + # If triggered by push to the develop branch + if: ${{ github.ref == 'refs/heads/develop' }} + run: | + poetry version prerelease + echo "software_version=$(poetry version | awk '{print $2}')" >> $GITHUB_ENV + echo "venue=sit" >> $GITHUB_ENV + - name: Bump rc version + # If triggered by push to a release branch + if: ${{ startsWith(github.ref, 'refs/heads/release/') }} + env: + # True if the version already has a 'rc' pre-release identifier + BUMP_RC: ${{ contains(steps.get-version.outputs.current_version, 'rc') }} + run: | + if [ "$BUMP_RC" = true ]; then + poetry version prerelease + else + poetry version ${GITHUB_REF#refs/heads/release/}rc1 + fi + echo "software_version=$(poetry version | awk '{print $2}')" >> $GITHUB_ENV + echo "venue=uat" >> $GITHUB_ENV + - name: Release version + # If triggered by push to the main branch + if: ${{ startsWith(github.ref, 'refs/heads/main') }} + env: + CURRENT_VERSION: ${{ steps.get-version.outputs.current_version }} + # Remove rc* from the end of version string + # The ${string%%substring} syntax below deletes the longest match of $substring from back of $string. + run: | + poetry version ${CURRENT_VERSION%%rc*} + echo "software_version=$(poetry version | awk '{print $2}')" >> $GITHUB_ENV + echo "venue=ops" >> $GITHUB_ENV + + - name: Run Snyk as a blocking step + uses: snyk/actions/python-3.10@master + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + with: + command: test + args: > + --org=${{ secrets.SNYK_ORG_ID }} + --project-name=${{ github.repository }} + --severity-threshold=high + --fail-on=all + - name: Run Snyk on Python + uses: snyk/actions/python-3.10@master + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + with: + command: monitor + args: > + --org=${{ secrets.SNYK_ORG_ID }} + --project-name=${{ github.repository }} + + - name: Commit Version Bump + # If building the `develop`, a `release` branch, or `main`, + # then we commit the version bump back to the repo. + if: | + github.ref == 'refs/heads/develop' || + github.ref == 'refs/heads/main' || + startsWith(github.ref, 'refs/heads/release') + run: | + git config --global user.name 'stitchee bot' + git config --global user.email 'stitchee@noreply.github.com' + git commit -am "/version ${{ env.software_version }}" + git push + + # Builds and pushes the package to the Python Package Index (PyPI) + - name: Build Python Artifact + run: | + poetry build + - uses: actions/upload-artifact@v4 + with: + name: python-artifact + path: dist/* + - name: Publish to test.pypi.org + id: pypi-test-publish + if: | + github.ref == 'refs/heads/develop' || + startsWith(github.ref, 'refs/heads/release') + env: + POETRY_PYPI_TOKEN_TESTPYPI: ${{secrets.PYPI_TOKEN_TESTPYPI}} + run: | + poetry config repositories.testpypi https://test.pypi.org/legacy/ + poetry publish -r testpypi + - name: Publish to pypi.org + if: ${{ github.ref == 'refs/heads/main' }} + id: pypi-publish + env: + POETRY_PYPI_TOKEN_PYPI: ${{secrets.PYPI_TOKEN_PYPI}} + run: | + poetry publish + + # Builds and pushes a Docker image + - name: Log in to the Container registry + if: ${{ !startsWith(github.ref, 'refs/heads/main/') }} + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for Docker + if: ${{ !startsWith(github.ref, 'refs/heads/main/') }} + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=raw,pattern={{version}},value=${{ env.software_version }} + type=raw,value=${{ env.venue }} + - name: Build and push Docker image + if: ${{ !startsWith(github.ref, 'refs/heads/main/') }} + id: docker-push + uses: docker/build-push-action@v5 + with: + context: . + file: Dockerfile + build-args: | + SOURCE=${{env.pyproject_name}}[harmony]==${{ env.software_version }} + push: true + pull: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + - name: Push Tag + if: | + github.ref == 'refs/heads/develop' || + github.ref == 'refs/heads/main' || + startsWith(github.ref, 'refs/heads/release') + run: | + git config user.name "${GITHUB_ACTOR}" + git config user.email "${GITHUB_ACTOR}@users.noreply.github.com" + git tag -a "${{ env.software_version }}" -m "Version ${{ env.software_version }}" + git push origin "${{ env.software_version }}" diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml deleted file mode 100644 index 7539bb6..0000000 --- a/.github/workflows/push.yml +++ /dev/null @@ -1,164 +0,0 @@ -name: Lint and Test - -# Controls when the workflow will run -on: - # Triggers the workflow on push events - push: - branches: [ develop, release/**, main, feature/**, issue/**, issues/** ] - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -env: - POETRY_VERSION: "1.3.2" - PYTHON_VERSION: "3.10" - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -jobs: - run_tests: - uses: ./.github/workflows/run_tests.yml - - bump_version: - needs: run_tests - runs-on: ubuntu-20.04 - - steps: - - name: Retrieve repository - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ env.PYTHON_VERSION }} - - - name: Install Poetry - uses: abatilo/actions-poetry@v3.0.0 - with: - poetry-version: ${{ env.POETRY_VERSION }} - - - name: Get version - id: get-version - run: | - echo "current_version=$(poetry version | awk '{print $2}')" >> $GITHUB_OUTPUT - echo "pyproject_name=$(poetry version | awk '{print $1}')" >> $GITHUB_ENV - - - name: Bump pre-alpha version - # If triggered by push to a feature branch - if: | - ${{ startsWith(github.ref, 'refs/heads/issue') }} || - ${{ startsWith(github.ref, 'refs/heads/dependabot/') }} || - ${{ startsWith(github.ref, 'refs/heads/feature/') }} - run: | - new_ver="${{ steps.get-version.outputs.current_version }}+$(git rev-parse --short ${GITHUB_SHA})" - poetry version $new_ver - echo "software_version=$(poetry version | awk '{print $2}')" >> $GITHUB_ENV - - - name: Bump alpha version - # If triggered by push to the develop branch - if: ${{ github.ref == 'refs/heads/develop' }} - run: | - poetry version prerelease - echo "software_version=$(poetry version | awk '{print $2}')" >> $GITHUB_ENV - echo "venue=sit" >> $GITHUB_ENV - - - name: Bump rc version - # If triggered by push to a release branch - if: ${{ startsWith(github.ref, 'refs/heads/release/') }} - env: - # True if the version already has a 'rc' pre-release identifier - BUMP_RC: ${{ contains(steps.get-version.outputs.current_version, 'rc') }} - run: | - if [ "$BUMP_RC" = true ]; then - poetry version prerelease - else - poetry version ${GITHUB_REF#refs/heads/release/}rc1 - fi - echo "software_version=$(poetry version | awk '{print $2}')" >> $GITHUB_ENV - echo "venue=uat" >> $GITHUB_ENV - - - name: Release version - # If triggered by push to the main branch - if: ${{ startsWith(github.ref, 'refs/heads/main') }} - env: - CURRENT_VERSION: ${{ steps.get-version.outputs.current_version }} - # True if the version already has a 'rc' pre-release identifier - BUMP_RC: ${{ contains(steps.get-version.outputs.current_version, 'rc') }} - # True if the version already has an 'alpha' pre-release identifier - BUMP_A: ${{ contains(steps.get-version.outputs.current_version, 'a') }} - # True if the version already has a 'beta' pre-release identifier - BUMP_B: ${{ contains(steps.get-version.outputs.current_version, 'b') }} - # Remove rc* from the end of version string - # The ${string%%substring} syntax below deletes the longest match of $substring from back of $string. - run: | - if [ "$BUMP_RC" = true ]; then - poetry version ${CURRENT_VERSION%%rc*} - elif [ "$BUMP_B" = true ]; then - poetry version ${CURRENT_VERSION%%b*} - elif [ "$BUMP_A" = true ]; then - poetry version ${CURRENT_VERSION%%a*} - fi - echo "software_version=$(poetry version | awk '{print $2}')" >> $GITHUB_ENV - echo "venue=ops" >> $GITHUB_ENV - - - name: Log in to the Container registry - if: ${{ !startsWith(github.ref, 'refs/heads/feature/') }} - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata (tags, labels) for Docker - if: ${{ !startsWith(github.ref, 'refs/heads/feature/') }} - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - tags: | - type=raw,pattern={{version}},value=${{ env.software_version }} - type=raw,value=${{ env.venue }} - -# - name: Wait for package -## if: ${{ !startsWith(github.ref, 'refs/heads/feature') }} -# if: ${{ startsWith(github.ref, 'refs/heads/feature/') }} -# run: | -# pip install tenacity -# ${GITHUB_WORKSPACE}/.github/workflows/wait-for-pypi.py ${{env.pyproject_name}}[harmony]==${{ env.software_version }} - - - name: Build and push Docker image - if: ${{ !startsWith(github.ref, 'refs/heads/feature/') }} - id: docker-push - uses: docker/build-push-action@v5 - with: - context: . - file: Dockerfile - build-args: | - SOURCE=${{env.pyproject_name}}[harmony]==${{ env.software_version }} - push: true - pull: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - -# - name: Commit Version Bump -# # If building develop, a release branch, or main then we commit the version bump back to the repo -# if: | -# github.ref == 'refs/heads/develop' || -# github.ref == 'refs/heads/main' || -# startsWith(github.ref, 'refs/heads/release') -# run: | -# git config --global user.name 'stitchee bot' -# git config --global user.email 'stitchee@noreply.github.com' -# git commit -am "/version ${{ env.software_version }}" -# git push -# -# - name: Push Tag -# if: | -# github.ref == 'refs/heads/develop' || -# github.ref == 'refs/heads/main' || -# startsWith(github.ref, 'refs/heads/release') -# run: | -# git config user.name "${GITHUB_ACTOR}" -# git config user.email "${GITHUB_ACTOR}@users.noreply.github.com" -# git tag -a "${{ env.software_version }}" -m "Version ${{ env.software_version }}" -# git push origin "${{ env.software_version }}" diff --git a/.github/workflows/release-created.yml b/.github/workflows/release-created.yml new file mode 100644 index 0000000..226b347 --- /dev/null +++ b/.github/workflows/release-created.yml @@ -0,0 +1,38 @@ +name: Release Branch Created + +# Run whenever a ref is created https://docs.github.com/en/actions/reference/events-that-trigger-workflows#create +on: + create + +jobs: + # First job in the workflow builds and verifies the software artifacts + bump: + name: Bump minor version on develop + # The type of runner that the job will run on + runs-on: ubuntu-latest + # Only run if ref created was a release branch + if: + ${{ startsWith(github.ref, 'refs/heads/release/') }} + steps: + # Checks-out the develop branch + - uses: actions/checkout@v4 + with: + ref: 'refs/heads/develop' + - uses: actions/setup-python@v5 + with: + python-version: 3.10 + - name: Install Poetry + uses: abatilo/actions-poetry@v3.0.0 + with: + poetry-version: 1.3.2 + - name: Bump minor version + run: | + poetry version ${GITHUB_REF#refs/heads/release/} + poetry version preminor + echo "software_version=$(poetry version | awk '{print $2}')" >> $GITHUB_ENV + - name: Commit Version Bump + run: | + git config --global user.name 'stitchee bot' + git config --global user.email 'stitchee@noreply.github.com' + git commit -am "/version ${{ env.software_version }}" + git push diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index bd44def..52366f7 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -38,34 +38,13 @@ jobs: run: | poetry run ruff concatenator - - name: Run tests with coverage - run: | - poetry run coverage run -m pytest tests/test_group_handling.py >& test_results.txt - # TODO: expand tests to include full concatenation runs, i.e., don't just run test_group_handling.py - - - name: Generate coverage report - if: ${{ always() }} - run: | - poetry run coverage report -m >& coverage_report.txt - poetry run coverage html --dir htmlcov - - - name: Archive test results - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: test result - path: test_results.txt - - - name: Archive code coverage report (plain text) - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: code coverage report (plain text) - path: coverage_report.txt + - name: Run tests and collect coverage + run: poetry run pytest --cov=concatenator tests/unit/test_dataset_and_group_handling.py + # TODO: expand tests to include full concatenation runs, i.e., not only test_dataset_and_group_handling.py - - name: Archive code coverage report (HTML) - if: ${{ always() }} - uses: actions/upload-artifact@v4 + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4.0.1 with: - name: code coverage report (HTML) - path: htmlcov/* + token: ${{ secrets.CODECOV_TOKEN }} + slug: nasa/stitchee + verbose: true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index aaf53a5..b0f78ff 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ --- repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: trailing-whitespace exclude: tests(/\w*)*/functional/t/trailing_whitespaces.py|tests/pyreverse/data/.*.html|doc/data/messages/t/trailing-whitespace/bad.py @@ -15,7 +15,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.3.3' + rev: 'v0.3.5' hooks: - id: ruff args: [ "--fix" ] diff --git a/CHANGELOG.md b/CHANGELOG.md index e90099d..55104da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [1.0.0] ### Added - [Pull #1](https://github.com/danielfromearth/stitchee/pull/1): An initial GitHub Actions workflow @@ -23,7 +23,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [Issue #44](https://github.com/danielfromearth/stitchee/issues/44): Concatenation dimension CLI argument is required but isn't listed as such in the help message - [Issue #81](https://github.com/danielfromearth/stitchee/issues/81): Remove `nco` related code - [Pull #129](https://github.com/danielfromearth/stitchee/pull/129): Sort according to extend dimension + - [Pull #152](https://github.com/danielfromearth/stitchee/pull/152): Consider empty a netCDF with only singleton null-values + - [Pull #157](https://github.com/danielfromearth/stitchee/pull/157): Update CI pipeline + - [Pull #158](https://github.com/danielfromearth/stitchee/pull/158): Add pypi publishing steps to CI pipeline ### Deprecated ### Removed ### Fixed -- [PR #4](https://github.com/danielfromearth/stitchee/pull/4): Error with TEMPO ozone profile data because of duplicated dimension names +- [Pull #4](https://github.com/danielfromearth/stitchee/pull/4): Error with TEMPO ozone profile data because of duplicated dimension names +- [Pull #133](https://github.com/danielfromearth/stitchee/pull/133): Fix conflicting dimensions on record dimension sorting diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 7dfce40..2be4b06 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,9 @@ Mypy checked + + Code coverage +

[//]: # (Using deprecated `align="center"` for the logo image and badges above, because of https://stackoverflow.com/a/62383408) @@ -83,3 +86,6 @@ For example: ```shell poetry run stitchee /path/to/netcdf/directory/ -o /path/to/output.nc ``` + +--- +This package is NASA Software Release Authorization (SRA) # LAR-20433-1 diff --git a/concatenator/group_handling.py b/concatenator/dataset_and_group_handling.py similarity index 84% rename from concatenator/group_handling.py rename to concatenator/dataset_and_group_handling.py index b051c16..fe05308 100644 --- a/concatenator/group_handling.py +++ b/concatenator/dataset_and_group_handling.py @@ -1,10 +1,12 @@ """ -group_handling.py +dataset_and_group_handling.py Functions for converting multidimensional data structures between a group hierarchy and a flat structure """ +from __future__ import annotations + import re import netCDF4 as nc @@ -313,3 +315,57 @@ def _get_dimension_size(dataset: nc.Dataset, dim_name: str) -> int: if dim_size is None: print(f"Dimension {dim_name} not found when searching for sizes!") return dim_size + + +def validate_workable_files(files_to_concat, logger) -> tuple[list[str], int]: + """Remove files from list that are not open-able as netCDF or that are empty.""" + workable_files = [] + for file in files_to_concat: + try: + with nc.Dataset(file, "r") as dataset: + is_empty = _is_file_empty(dataset) + if is_empty is False: + workable_files.append(file) + except OSError: + logger.debug("Error opening <%s> as a netCDF dataset. Skipping.", file) + + number_of_workable_files = len(workable_files) + + return workable_files, number_of_workable_files + + +def _is_file_empty(parent_group: nc.Dataset | nc.Group) -> bool: + """Check if netCDF dataset is empty or not. + + Tests if all variable arrays are empty. + As soon as a variable is detected with both (i) an array size not equal to zero and + (ii) not all null/fill values, then the granule is considered non-empty. + + Returns + ------- + False if the dataset is considered non-empty; True otherwise (dataset is indeed empty). + """ + for var_name, var in parent_group.variables.items(): + if var.size != 0: + if "_FillValue" in var.ncattrs(): + fill_or_null = getattr(var, "_FillValue") + else: + fill_or_null = np.nan + + # This checks three ways that the variable's array might be considered empty. + # If none of the ways are true, + # a non-empty variable has been found and False is returned. + # If one of the ways is true, we consider the variable empty, + # and continue checking other variables. + empty_way_1 = False + if np.ma.isMaskedArray(var[:]): + empty_way_1 = var[:].mask.all() + empty_way_2 = np.all(var[:].data == fill_or_null) + empty_way_3 = np.all(np.isnan(var[:].data)) + + if not (empty_way_1 or empty_way_2 or empty_way_3): + return False # Found a non-empty variable. + + for child_group in parent_group.groups.values(): + return _is_file_empty(child_group) + return True diff --git a/concatenator/stitchee.py b/concatenator/stitchee.py index 89c7090..feb68de 100644 --- a/concatenator/stitchee.py +++ b/concatenator/stitchee.py @@ -13,12 +13,13 @@ import xarray as xr from concatenator import GROUP_DELIM -from concatenator.dimension_cleanup import remove_duplicate_dims -from concatenator.file_ops import add_label_to_path -from concatenator.group_handling import ( +from concatenator.dataset_and_group_handling import ( flatten_grouped_dataset, regroup_flattened_dataset, + validate_workable_files, ) +from concatenator.dimension_cleanup import remove_duplicate_dims +from concatenator.file_ops import add_label_to_path default_logger = logging.getLogger(__name__) @@ -65,7 +66,7 @@ def stitchee( benchmark_log = {"flattening": 0.0, "concatenating": 0.0, "reconstructing_groups": 0.0} # Proceed to concatenate only files that are workable (can be opened and are not empty). - input_files, num_input_files = _validate_workable_files(files_to_concat, logger) + input_files, num_input_files = validate_workable_files(files_to_concat, logger) # Exit cleanly if no workable netCDF files found. if num_input_files < 1: @@ -120,7 +121,10 @@ def stitchee( xrdataset_list.append(xrds) # Reorder the xarray datasets according to the concat dim values. - xrdataset_list = [x for _, x in sorted(zip(concat_dim_order, xrdataset_list))] + xrdataset_list = [ + dataset + for _, dataset in sorted(zip(concat_dim_order, xrdataset_list), key=lambda x: x[0]) + ] # Flattened files are concatenated together (Using XARRAY). start_time = time.time() @@ -193,32 +197,3 @@ def stitchee( raise err return output_file - - -def _validate_workable_files(files_to_concat, logger) -> tuple[list[str], int]: - """Remove files from list that are not open-able as netCDF or that are empty.""" - workable_files = [] - for file in files_to_concat: - try: - with nc.Dataset(file, "r") as dataset: - is_empty = _is_file_empty(dataset) - if is_empty is False: - workable_files.append(file) - except OSError: - logger.debug("Error opening <%s> as a netCDF dataset. Skipping.", file) - - number_of_workable_files = len(workable_files) - - return workable_files, number_of_workable_files - - -def _is_file_empty(parent_group: nc.Dataset | nc.Group) -> bool: - """ - Function to test if a all variable size in a dataset is 0 - """ - for var in parent_group.variables.values(): - if var.size != 0: - return False - for child_group in parent_group.groups.values(): - return _is_file_empty(child_group) - return True diff --git a/poetry.lock b/poetry.lock index 3d0de62..b3d225a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -48,17 +48,17 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "boto3" -version = "1.34.65" +version = "1.34.81" description = "The AWS SDK for Python" optional = false -python-versions = ">= 3.8" +python-versions = ">=3.8" files = [ - {file = "boto3-1.34.65-py3-none-any.whl", hash = "sha256:b611de58ab28940a36c77d7ef9823427ebf25d5ee8277b802f9979b14e780534"}, - {file = "boto3-1.34.65.tar.gz", hash = "sha256:db97f9c29f1806cf9020679be0dd5ffa2aff2670e28e0e2046f98b979be498a4"}, + {file = "boto3-1.34.81-py3-none-any.whl", hash = "sha256:18224d206a8a775bcaa562d22ed3d07854934699190e12b52fcde87aac76a80e"}, + {file = "boto3-1.34.81.tar.gz", hash = "sha256:004dad209d37b3d2df88f41da13b7ad702a751904a335fac095897ff7a19f82b"}, ] [package.dependencies] -botocore = ">=1.34.65,<1.35.0" +botocore = ">=1.34.81,<1.35.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -67,13 +67,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.65" +version = "1.34.81" description = "Low-level, data-driven core of boto 3." optional = false -python-versions = ">= 3.8" +python-versions = ">=3.8" files = [ - {file = "botocore-1.34.65-py3-none-any.whl", hash = "sha256:3b0012d7293880c0a4883883047e93f2888d7317b5e9e8a982a991b90d951f3e"}, - {file = "botocore-1.34.65.tar.gz", hash = "sha256:399a1b1937f7957f0ee2e0df351462b86d44986b795ced980c11eb768b0e61c5"}, + {file = "botocore-1.34.81-py3-none-any.whl", hash = "sha256:85f6fd7c5715eeef7a236c50947de00f57d72e7439daed1125491014b70fab01"}, + {file = "botocore-1.34.81.tar.gz", hash = "sha256:f79bf122566cc1f09d71cc9ac9fcf52d47ba48b761cbc3f064017b36a3c40eb8"}, ] [package.dependencies] @@ -391,18 +391,21 @@ files = [ {file = "coverage-7.4.4.tar.gz", hash = "sha256:c901df83d097649e257e803be22592aedfd5182f07b3cc87d640bbb9afd50f49"}, ] +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + [package.extras] toml = ["tomli"] [[package]] name = "dask" -version = "2024.3.1" +version = "2024.4.1" description = "Parallel PyData with Task Scheduling" optional = false python-versions = ">=3.9" files = [ - {file = "dask-2024.3.1-py3-none-any.whl", hash = "sha256:1ac260b8716b1a9fc144c0d7f958336812cfc3ef542a3742c9ae02387189b32b"}, - {file = "dask-2024.3.1.tar.gz", hash = "sha256:78bee2ffd735514e572adaa669fc2a437ec256aecb6bec036a1f5b8dd36b2e60"}, + {file = "dask-2024.4.1-py3-none-any.whl", hash = "sha256:cac5d28b9de7a7cfde46d6fbd8fa81f5654980d010b44d1dbe04dd13b5b63126"}, + {file = "dask-2024.4.1.tar.gz", hash = "sha256:6cd8eb03ddc8dc08d6ca5b167b8de559872bc51cc2b6587d0e9dc754ab19cdf0"}, ] [package.dependencies] @@ -420,7 +423,7 @@ array = ["numpy (>=1.21)"] complete = ["dask[array,dataframe,diagnostics,distributed]", "lz4 (>=4.3.2)", "pyarrow (>=7.0)", "pyarrow-hotfix"] dataframe = ["dask-expr (>=1.0,<1.1)", "dask[array]", "pandas (>=1.3)"] diagnostics = ["bokeh (>=2.4.2)", "jinja2 (>=2.10.3)"] -distributed = ["distributed (==2024.3.1)"] +distributed = ["distributed (==2024.4.1)"] test = ["pandas[test]", "pre-commit", "pytest", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist"] [[package]] @@ -519,13 +522,13 @@ files = [ [[package]] name = "importlib-metadata" -version = "7.0.2" +version = "7.1.0" description = "Read metadata from Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "importlib_metadata-7.0.2-py3-none-any.whl", hash = "sha256:f4bc4c0c070c490abf4ce96d715f68e95923320370efb66143df00199bb6c100"}, - {file = "importlib_metadata-7.0.2.tar.gz", hash = "sha256:198f568f3230878cb1b44fbd7975f87906c22336dba2e4a7f05278c281fbd792"}, + {file = "importlib_metadata-7.1.0-py3-none-any.whl", hash = "sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570"}, + {file = "importlib_metadata-7.1.0.tar.gz", hash = "sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"}, ] [package.dependencies] @@ -534,7 +537,7 @@ zipp = ">=0.5" [package.extras] docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] perf = ["ipython"] -testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] [[package]] name = "iniconfig" @@ -861,13 +864,13 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "pycparser" -version = "2.21" +version = "2.22" description = "C parser in Python" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.8" files = [ - {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, - {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, + {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, + {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] [[package]] @@ -935,6 +938,24 @@ tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-cov" +version = "5.0.0" +description = "Pytest plugin for measuring coverage." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857"}, + {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, +] + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1054,28 +1075,28 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "ruff" -version = "0.3.3" +version = "0.3.7" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.3.3-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:973a0e388b7bc2e9148c7f9be8b8c6ae7471b9be37e1cc732f8f44a6f6d7720d"}, - {file = "ruff-0.3.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:cfa60d23269d6e2031129b053fdb4e5a7b0637fc6c9c0586737b962b2f834493"}, - {file = "ruff-0.3.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1eca7ff7a47043cf6ce5c7f45f603b09121a7cc047447744b029d1b719278eb5"}, - {file = "ruff-0.3.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7d3f6762217c1da954de24b4a1a70515630d29f71e268ec5000afe81377642d"}, - {file = "ruff-0.3.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b24c19e8598916d9c6f5a5437671f55ee93c212a2c4c569605dc3842b6820386"}, - {file = "ruff-0.3.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5a6cbf216b69c7090f0fe4669501a27326c34e119068c1494f35aaf4cc683778"}, - {file = "ruff-0.3.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:352e95ead6964974b234e16ba8a66dad102ec7bf8ac064a23f95371d8b198aab"}, - {file = "ruff-0.3.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d6ab88c81c4040a817aa432484e838aaddf8bfd7ca70e4e615482757acb64f8"}, - {file = "ruff-0.3.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79bca3a03a759cc773fca69e0bdeac8abd1c13c31b798d5bb3c9da4a03144a9f"}, - {file = "ruff-0.3.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2700a804d5336bcffe063fd789ca2c7b02b552d2e323a336700abb8ae9e6a3f8"}, - {file = "ruff-0.3.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fd66469f1a18fdb9d32e22b79f486223052ddf057dc56dea0caaf1a47bdfaf4e"}, - {file = "ruff-0.3.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:45817af234605525cdf6317005923bf532514e1ea3d9270acf61ca2440691376"}, - {file = "ruff-0.3.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:0da458989ce0159555ef224d5b7c24d3d2e4bf4c300b85467b08c3261c6bc6a8"}, - {file = "ruff-0.3.3-py3-none-win32.whl", hash = "sha256:f2831ec6a580a97f1ea82ea1eda0401c3cdf512cf2045fa3c85e8ef109e87de0"}, - {file = "ruff-0.3.3-py3-none-win_amd64.whl", hash = "sha256:be90bcae57c24d9f9d023b12d627e958eb55f595428bafcb7fec0791ad25ddfc"}, - {file = "ruff-0.3.3-py3-none-win_arm64.whl", hash = "sha256:0171aab5fecdc54383993389710a3d1227f2da124d76a2784a7098e818f92d61"}, - {file = "ruff-0.3.3.tar.gz", hash = "sha256:38671be06f57a2f8aba957d9f701ea889aa5736be806f18c0cd03d6ff0cbca8d"}, + {file = "ruff-0.3.7-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:0e8377cccb2f07abd25e84fc5b2cbe48eeb0fea9f1719cad7caedb061d70e5ce"}, + {file = "ruff-0.3.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:15a4d1cc1e64e556fa0d67bfd388fed416b7f3b26d5d1c3e7d192c897e39ba4b"}, + {file = "ruff-0.3.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d28bdf3d7dc71dd46929fafeec98ba89b7c3550c3f0978e36389b5631b793663"}, + {file = "ruff-0.3.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:379b67d4f49774ba679593b232dcd90d9e10f04d96e3c8ce4a28037ae473f7bb"}, + {file = "ruff-0.3.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c060aea8ad5ef21cdfbbe05475ab5104ce7827b639a78dd55383a6e9895b7c51"}, + {file = "ruff-0.3.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:ebf8f615dde968272d70502c083ebf963b6781aacd3079081e03b32adfe4d58a"}, + {file = "ruff-0.3.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d48098bd8f5c38897b03604f5428901b65e3c97d40b3952e38637b5404b739a2"}, + {file = "ruff-0.3.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da8a4fda219bf9024692b1bc68c9cff4b80507879ada8769dc7e985755d662ea"}, + {file = "ruff-0.3.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c44e0149f1d8b48c4d5c33d88c677a4aa22fd09b1683d6a7ff55b816b5d074f"}, + {file = "ruff-0.3.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3050ec0af72b709a62ecc2aca941b9cd479a7bf2b36cc4562f0033d688e44fa1"}, + {file = "ruff-0.3.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a29cc38e4c1ab00da18a3f6777f8b50099d73326981bb7d182e54a9a21bb4ff7"}, + {file = "ruff-0.3.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5b15cc59c19edca917f51b1956637db47e200b0fc5e6e1878233d3a938384b0b"}, + {file = "ruff-0.3.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:e491045781b1e38b72c91247cf4634f040f8d0cb3e6d3d64d38dcf43616650b4"}, + {file = "ruff-0.3.7-py3-none-win32.whl", hash = "sha256:bc931de87593d64fad3a22e201e55ad76271f1d5bfc44e1a1887edd0903c7d9f"}, + {file = "ruff-0.3.7-py3-none-win_amd64.whl", hash = "sha256:5ef0e501e1e39f35e03c2acb1d1238c595b8bb36cf7a170e7c1df1b73da00e74"}, + {file = "ruff-0.3.7-py3-none-win_arm64.whl", hash = "sha256:789e144f6dc7019d1f92a812891c645274ed08af6037d11fc65fcbc183b7d59f"}, + {file = "ruff-0.3.7.tar.gz", hash = "sha256:d5c1aebee5162c2226784800ae031f660c350e7a3402c4d1f8ea4e97e232e3ba"}, ] [[package]] @@ -1130,13 +1151,13 @@ files = [ [[package]] name = "typing-extensions" -version = "4.10.0" +version = "4.11.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.10.0-py3-none-any.whl", hash = "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475"}, - {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"}, + {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, + {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, ] [[package]] @@ -1168,13 +1189,13 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] name = "xarray" -version = "2024.2.0" +version = "2024.3.0" description = "N-D labeled arrays and datasets in Python" optional = false python-versions = ">=3.9" files = [ - {file = "xarray-2024.2.0-py3-none-any.whl", hash = "sha256:a31a9b37e39bd5aeb098070a75d6dd4d59019eb339d735b86108b9e0cb391f94"}, - {file = "xarray-2024.2.0.tar.gz", hash = "sha256:a105f02791082c888ebe2622090beaff2e7b68571488d62fe6afdab35b4b717f"}, + {file = "xarray-2024.3.0-py3-none-any.whl", hash = "sha256:ca2bc4da2bf2e7879e15862a7a7c3fc76ad19f6a08931d030220cef39a29118d"}, + {file = "xarray-2024.3.0.tar.gz", hash = "sha256:5c1db19efdde61db7faedad8fc944f4e29698fb6fbd578d352668b63598bd1d8"}, ] [package.dependencies] @@ -1208,4 +1229,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "45f3f1ff77ebde64e9018898030958d42cd2cc5c985ebe40863651155ba625a0" +content-hash = "4aded716a2462ce682e2dc8228895f785620feeae5cd79cf417dcbe433898274" diff --git a/pyproject.toml b/pyproject.toml index d18f861..6e1719d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "stitchee" -version = "0.1.0" +version = "1.0.0rc3" description = "NetCDF4 Along-existing-dimension Concatenation Service" authors = ["Daniel Kaufman "] readme = "README.md" @@ -14,18 +14,17 @@ packages = [ [tool.poetry.dependencies] python = "^3.10" netcdf4 = "^1.6.5" -xarray = "^2024.2.0" -dask = "^2024.2.1" +xarray = "^2024.3.0" +dask = "^2024.4.1" harmony-service-lib = "^1.0.25" [tool.poetry.group.dev.dependencies] pytest = "^8.1.1" mypy = "^1.9.0" black = "^24.2.0" -ruff = "^0.3.2" +ruff = "^0.3.7" coverage = "^7.4.4" - -[tool.poetry.group.extras.dependencies] +pytest-cov = "^5.0.0" [tool.poetry.scripts] stitchee_harmony = 'concatenator.harmony.cli:main' diff --git a/tests/conftest.py b/tests/conftest.py index c4bd1e3..232b69e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,6 +5,7 @@ from pathlib import Path import netCDF4 as nc +import numpy as np import pytest @@ -54,6 +55,38 @@ def temp_output_dir(tmpdir_factory) -> Path: return Path(tmpdir_factory.mktemp("tmp-")) +@pytest.fixture(scope="function") +def toy_empty_dataset(temp_toy_data_dir): + """Creates groups, dimensions, variables; and uses chosen step values in an open dataset""" + + filepath = temp_toy_data_dir / "test_empty_dataset.nc" + + f = nc.Dataset(filename=filepath, mode="w") + + grp1 = f.createGroup("Group1") + + # Root-level Dimensions/Variables + f.createDimension("step", 1) + f.createDimension("track", 1) + f.createVariable("step", "f4", ("step",), fill_value=False) + f.createVariable("track", "f4", ("track",), fill_value=False) + f.createVariable("var0", "f4", ("step", "track")) + + # + f["step"][:] = [np.nan] + f["track"][:] = [np.nan] + f["var0"][:] = [np.nan] + + # Group 1 Dimensions/Variables + grp1.createVariable("var1", "f8", ("step", "track")) + # + grp1["var1"][:] = [np.nan] + + f.close() + + return filepath + + def add_to_ds_3dims_3vars_4coords_1group_with_step_values(open_ds: nc.Dataset, step_values: list): """Creates groups, dimensions, variables; and uses chosen step values in an open dataset""" grp1 = open_ds.createGroup("Group1") diff --git a/tests/data/unit-test-data/singleton_null_variables-TEMPO_NO2_L2_V01_20240123T231358Z_S013G03_product_vertical_column_total.nc4 b/tests/data/unit-test-data/singleton_null_variables-TEMPO_NO2_L2_V01_20240123T231358Z_S013G03_product_vertical_column_total.nc4 new file mode 100644 index 0000000..7c2abc9 Binary files /dev/null and b/tests/data/unit-test-data/singleton_null_variables-TEMPO_NO2_L2_V01_20240123T231358Z_S013G03_product_vertical_column_total.nc4 differ diff --git a/tests/test_concat.py b/tests/test_concat.py index b8f7e7a..0b54f84 100644 --- a/tests/test_concat.py +++ b/tests/test_concat.py @@ -7,6 +7,7 @@ import netCDF4 as nc import pytest +from concatenator.dataset_and_group_handling import GROUP_DELIM from concatenator.stitchee import stitchee from . import data_for_tests_dir @@ -46,10 +47,21 @@ def run_verification_with_stitchee( # Verify that the length of the record dimension in the concatenated file equals # the sum of the lengths across the input files - length_sum = 0 + original_files_length_sum = 0 for file in prepared_input_files: - length_sum += len(nc.Dataset(file).variables[record_dim_name]) - assert length_sum == len(merged_dataset.variables[record_dim_name]) + # length_sum += len(nc.Dataset(file).variables[record_dim_name]) + with nc.Dataset(file) as ncds: + try: + original_files_length_sum += ncds.dimensions[record_dim_name].size + except KeyError: + original_files_length_sum += ncds.dimensions[GROUP_DELIM + record_dim_name].size + + try: + merged_file_length = merged_dataset.dimensions[record_dim_name].size + except KeyError: + merged_file_length = merged_dataset.dimensions[GROUP_DELIM + record_dim_name].size + + assert original_files_length_sum == merged_file_length return merged_dataset @@ -88,6 +100,14 @@ def test_tempo_no2_concat_with_stitchee(self, temp_output_dir): concat_method="xarray-concat", ) + def test_tempo_no2_subsetter_output_concat_with_stitchee(self, temp_output_dir): + self.run_verification_with_stitchee( + input_dir=data_for_tests_dir / "tempo/no2_subsetted", + output_dir=temp_output_dir, + output_name="tempo_no2_stitcheed.nc", + concat_method="xarray-concat", + ) + def test_tempo_hcho_concat_with_stitchee(self, temp_output_dir): self.run_verification_with_stitchee( input_dir=data_for_tests_dir / "tempo/hcho", diff --git a/tests/test_group_handling.py b/tests/test_group_handling.py deleted file mode 100644 index 0515910..0000000 --- a/tests/test_group_handling.py +++ /dev/null @@ -1,28 +0,0 @@ -"""Tests for manipulating netCDF groups.""" - -# pylint: disable=C0116, C0301 - -from concatenator.attribute_handling import (_flatten_coordinate_attribute, - regroup_coordinate_attribute) - - -def test_coordinate_attribute_flattening(): - # Case with groups present and double spaces. - assert _flatten_coordinate_attribute( - "Time_and_Position/time Time_and_Position/instrument_fov_latitude Time_and_Position/instrument_fov_longitude" - ) == '__Time_and_Position__time __Time_and_Position__instrument_fov_latitude __Time_and_Position__instrument_fov_longitude' - - # Case with NO groups present and single spaces. - assert _flatten_coordinate_attribute( - "time longitude latitude ozone_profile_pressure ozone_profile_altitude" - ) == "__time __longitude __latitude __ozone_profile_pressure __ozone_profile_altitude" - - -def test_coordinate_attribute_regrouping(): - # Case with groups present and double spaces. - assert regroup_coordinate_attribute( - '__Time_and_Position__time __Time_and_Position__instrument_fov_latitude __Time_and_Position__instrument_fov_longitude') == "Time_and_Position/time Time_and_Position/instrument_fov_latitude Time_and_Position/instrument_fov_longitude" - - # Case with NO groups present and single spaces. - assert regroup_coordinate_attribute( - "__time __longitude __latitude __ozone_profile_pressure __ozone_profile_altitude") == "time longitude latitude ozone_profile_pressure ozone_profile_altitude" diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/test_dataset_and_group_handling.py b/tests/unit/test_dataset_and_group_handling.py new file mode 100644 index 0000000..03678a9 --- /dev/null +++ b/tests/unit/test_dataset_and_group_handling.py @@ -0,0 +1,72 @@ +"""Tests for manipulating netCDF groups.""" + +# pylint: disable=C0116, C0301 + +import netCDF4 as nc + +from concatenator.attribute_handling import ( + _flatten_coordinate_attribute, + regroup_coordinate_attribute, +) +from concatenator.dataset_and_group_handling import _is_file_empty + +from .. import data_for_tests_dir + + +def test_dataset_with_singleton_null_values_is_identified_as_empty(): + """Ensure that a dataset with only null arrays with 1-length dimensions is identified as empty.""" + singleton_null_values_file = ( + data_for_tests_dir + / "unit-test-data" + / "singleton_null_variables-TEMPO_NO2_L2_V01_20240123T231358Z_S013G03_product_vertical_column_total.nc4" + ) + with nc.Dataset(singleton_null_values_file) as ds: + assert _is_file_empty(ds) + + +def test_toy_dataset_with_singleton_null_values_is_identified_as_empty(toy_empty_dataset): + """Ensure that a dataset with only null arrays with 1-length dimensions is identified as empty.""" + with nc.Dataset(toy_empty_dataset) as ds: + assert _is_file_empty(ds) + + +def test_dataset_with_values_is_identified_as_not_empty(ds_3dims_3vars_4coords_1group_part1): + """Ensure that a dataset with non-null arrays is identified as NOT empty.""" + with nc.Dataset(ds_3dims_3vars_4coords_1group_part1) as ds: + assert _is_file_empty(ds) is False + + +def test_coordinate_attribute_flattening(): + # Case with groups present and double spaces. + assert ( + _flatten_coordinate_attribute( + "Time_and_Position/time Time_and_Position/instrument_fov_latitude Time_and_Position/instrument_fov_longitude" + ) + == "__Time_and_Position__time __Time_and_Position__instrument_fov_latitude __Time_and_Position__instrument_fov_longitude" + ) + + # Case with NO groups present and single spaces. + assert ( + _flatten_coordinate_attribute( + "time longitude latitude ozone_profile_pressure ozone_profile_altitude" + ) + == "__time __longitude __latitude __ozone_profile_pressure __ozone_profile_altitude" + ) + + +def test_coordinate_attribute_regrouping(): + # Case with groups present and double spaces. + assert ( + regroup_coordinate_attribute( + "__Time_and_Position__time __Time_and_Position__instrument_fov_latitude __Time_and_Position__instrument_fov_longitude" + ) + == "Time_and_Position/time Time_and_Position/instrument_fov_latitude Time_and_Position/instrument_fov_longitude" + ) + + # Case with NO groups present and single spaces. + assert ( + regroup_coordinate_attribute( + "__time __longitude __latitude __ozone_profile_pressure __ozone_profile_altitude" + ) + == "time longitude latitude ozone_profile_pressure ozone_profile_altitude" + )