From 2c8375402e360e3868b55f67ac1763a8ed63f1bb Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 25 Sep 2024 21:22:05 -0600 Subject: [PATCH] DAS-2214 Add NSIDC IceSAT2 Regression Tests (#92) Co-authored-by: Amy Steiker <47193922+asteiker@users.noreply.github.com> --- .gitattributes | 1 + .github/workflows/build-all-images.yml | 15 +- .github/workflows/build-target-image.yml | 17 +- CHANGELOG.md | 17 +- README.md | 29 +- script/test-in-bamboo.sh | 2 +- test/.flake8 | 4 - test/Dockerfile | 10 + test/Makefile | 54 ++- .../NSIDC-ICESAT2_Regression.ipynb | 435 ++++++++++++++++++ .../ancillary/Iceland_sliver.zip | Bin 0 -> 1136 bytes .../Ross_Sea_positive_lon_only.geojson | 52 +++ .../ancillary/SriLanka_simple.kml | 24 + .../ancillary/Tasmania_sliver.geojson | 36 ++ test/nsidc-icesat2/environment.yaml | 13 + .../ATL03_subset_bounding_box_reference.h5 | 3 + ...TL04_subset_by_temporal_range_reference.h5 | 3 + .../ATL06_subset_by_shapefile_reference.h5 | 3 + .../ATL07_subset_bounding_box_reference.h5 | 3 + .../ATL08_subset_bounding_box_reference.h5 | 3 + .../ATL08_subset_by_shapefile_reference.h5 | 3 + .../ATL09_subset_by_shapefile_reference.h5 | 3 + .../ATL10_subset_bounding_box_reference.h5 | 3 + .../ATL10_subset_by_shapefile_reference.h5 | 3 + .../ATL12_subset_bounding_box_reference.h5 | 3 + .../ATL13_subset_bounding_box_reference.h5 | 3 + test/nsidc-icesat2/version.txt | 1 + test/run_notebooks.sh | 2 +- test/shared_utils/README.md | 52 +++ test/shared_utils/utilities.py | 148 ++++++ 30 files changed, 910 insertions(+), 35 deletions(-) create mode 100644 .gitattributes delete mode 100644 test/.flake8 create mode 100644 test/nsidc-icesat2/NSIDC-ICESAT2_Regression.ipynb create mode 100644 test/nsidc-icesat2/ancillary/Iceland_sliver.zip create mode 100644 test/nsidc-icesat2/ancillary/Ross_Sea_positive_lon_only.geojson create mode 100644 test/nsidc-icesat2/ancillary/SriLanka_simple.kml create mode 100644 test/nsidc-icesat2/ancillary/Tasmania_sliver.geojson create mode 100644 test/nsidc-icesat2/environment.yaml create mode 100644 test/nsidc-icesat2/reference_files/ATL03_subset_bounding_box_reference.h5 create mode 100644 test/nsidc-icesat2/reference_files/ATL04_subset_by_temporal_range_reference.h5 create mode 100644 test/nsidc-icesat2/reference_files/ATL06_subset_by_shapefile_reference.h5 create mode 100644 test/nsidc-icesat2/reference_files/ATL07_subset_bounding_box_reference.h5 create mode 100644 test/nsidc-icesat2/reference_files/ATL08_subset_bounding_box_reference.h5 create mode 100644 test/nsidc-icesat2/reference_files/ATL08_subset_by_shapefile_reference.h5 create mode 100644 test/nsidc-icesat2/reference_files/ATL09_subset_by_shapefile_reference.h5 create mode 100644 test/nsidc-icesat2/reference_files/ATL10_subset_bounding_box_reference.h5 create mode 100644 test/nsidc-icesat2/reference_files/ATL10_subset_by_shapefile_reference.h5 create mode 100644 test/nsidc-icesat2/reference_files/ATL12_subset_bounding_box_reference.h5 create mode 100644 test/nsidc-icesat2/reference_files/ATL13_subset_bounding_box_reference.h5 create mode 100644 test/nsidc-icesat2/version.txt create mode 100644 test/shared_utils/README.md create mode 100644 test/shared_utils/utilities.py diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..560869c --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +test/nsidc-icesat2/reference_files/*.h5 filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/build-all-images.yml b/.github/workflows/build-all-images.yml index 5119c61..b9e5638 100644 --- a/.github/workflows/build-all-images.yml +++ b/.github/workflows/build-all-images.yml @@ -29,6 +29,11 @@ jobs: - image: "n2z" notebook: "N2Z_Regression.ipynb" + - + image: "nsidc-icesat2" + notebook: "NSIDC-ICESAT2_Regression.ipynb" + shared-utils: "true" + lfs: "true" - image: "regridder" notebook: "Regridder_Regression.ipynb" @@ -41,14 +46,16 @@ jobs: - image: "variable-subsetter" notebook: "VariableSubsetter_Regression.ipynb" - - - image: "geoloco" + - + image: "geoloco" notebook: "Geoloco_Regression.ipynb" - - - image: "net2cog" + - + image: "net2cog" notebook: "net2cog_Regression.ipynb" uses: ./.github/workflows/build-target-image.yml with: image-short-name: ${{ matrix.targets.image }} notebook-name: ${{ matrix.targets.notebook }} + shared-utils: ${{ matrix.targets.shared-utils || 'false' }} + lfs: ${{ matrix.targets.lfs || '' }} diff --git a/.github/workflows/build-target-image.yml b/.github/workflows/build-target-image.yml index 259f8b8..cc26ea7 100644 --- a/.github/workflows/build-target-image.yml +++ b/.github/workflows/build-target-image.yml @@ -13,6 +13,12 @@ on: notebook-name: required: true type: string + shared-utils: + required: true + type: string + lfs: + required: true + type: string env: REGISTRY: ghcr.io @@ -22,10 +28,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout regression test repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 + with: + lfs: ${{inputs.lfs}} - name: Did this image's version change? - uses: dorny/paths-filter@v2 + uses: dorny/paths-filter@v3 id: changes with: filters: | @@ -54,13 +62,12 @@ jobs: tags: | type=semver,pattern={{version}},value=${{ env.semantic_version }} - # https://github.com/docker/build-push-action/issues/761#issuecomment-1383822381 - name: Set up Docker Buildx if: steps.changes.outputs.src == 'true' uses: docker/setup-buildx-action@v2 with: driver-opts: | - image=moby/buildkit:v0.10.6 + image=moby/buildkit:latest - name: Build and Push Docker image uses: docker/build-push-action@v3 @@ -69,6 +76,8 @@ jobs: build-args: | notebook=${{inputs.notebook-name}} sub_dir=${{inputs.image-short-name}} + shared_utils=${{inputs.shared-utils}} + file: ./test/Dockerfile context: ./test push: true diff --git a/CHANGELOG.md b/CHANGELOG.md index d351010..455475f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,14 +5,23 @@ versioning. Rather than a static releases, this repository contains of a number of regression tests that are each semi-independent. This CHANGELOG file should be used to document pull requests to this repository. -## 2024-08-30 +## 2024-09-24 ([#92](https://github.com/nasa/harmony-regression-tests/pull/92)) + +- Adds NSIDC ICESat2 Regression test suite. + +- Adds `shared_utils` functionality. This directory contains routines that are commonly used in regression tests and limits code duplication. To include the `shared_utils` directory in your docker container, update the `Makefile` to add a shared_utils build arg. E.g. `--build-arg shared_utils=true` and update the `.github/workflows/build-all-images.yml` to add a `shared-utils` key of "true" (see the nsidc-icesat2-image target in each file) + +- Adds Git LFS functionality. Large files can be configured to use [Git LFS](https://git-lfs.com/). This PR configures the NSIDC reference files `test/nsidc-icesat2/reference_files/*.h5`. + + +## 2024-08-30 ([#94](https://github.com/nasa/harmony-regression-tests/pull/94)) Add regression test for net2cog ## 2024-08-05 ([#86](https://github.com/nasa/harmony-regression-tests/pull/86)) -Adds this file to capture changes to the repository. +- Adds this file to capture changes to the repository. -Adds pre-commit.ci behavior to the repository. This setup ensures consistent code style, catches common errors, and maintains file hygiene across the project. +- Adds pre-commit.ci behavior to the repository. This setup ensures consistent code style, catches common errors, and maintains file hygiene across the project. -Updates the base image for all regression tests to `mambaorg/micromamba:1.5.8-jammy` +- Updates the base image for all regression tests to `mambaorg/micromamba:1.5.8-jammy` diff --git a/README.md b/README.md index 1928b24..d12154c 100644 --- a/README.md +++ b/README.md @@ -90,12 +90,20 @@ environment before installing from the environment.yml. is simplest to use the same string for the subdirectory name and the suite name. 1. Update the `test/Makefile` to be able to build a Docker image for the new - test suite: + test suite optionally including the shared utility directory: ``` -image - docker build -t ghcr.io/nasa/regression-tests-:latest -f ./Dockerfile --build-arg notebook= --build-arg sub_dir= . + docker build -t ghcr.io/nasa/regression-tests-:latest -f ./Dockerfile --build-arg notebook= --build-arg sub_dir= [--build-arg shared_utils=true] . ``` + +1. If you would like to use shared utilities to help ease the coding you can + add the shared_util build-arg to your docker build command in the Makefile + (as well as adding it as a key in the `workflow/build-all-images.yml` file). + When enabed, this argument will include the `tests/shared_utils` directory + as a sibling directory to your tests. See the + `tests/shared_utils/README.md` file for more information. + 1. Update the `make images` rule to include building the new image. ``` @@ -123,12 +131,24 @@ to create a new version of the test image any time the related `version.txt` file is updated. To do so, simply add a new target to the [build-all-images.yml](https://github.com/nasa/harmony-regression-tests/blob/main/.github/workflows/build-all-images.yml) workflow in the `.github/workflows` directory: -``` +```yaml - image: notebook: ``` +The above is the basic structure for adding a new image to the CI/CD. Two additional options `shared-utils` and `lfs` default to off, but can be over-ridden as they are for the nsidc-icesat2 image. `shared-utils` controls the addition of the `tests/shared_utils` directory into your image. `lfs` enables git LFS for your image and should be enabled only if you have added reference files with git LFS. + +``` yaml + - + image: "nsidc-icesat2" + notebook: "NSIDC-ICESAT2_Regression.ipynb" + shared-utils: "true" + lfs: "true" + +``` + + ## Test suite contents: This section of the README describes the files that are expected in every test @@ -145,7 +165,8 @@ For example, in the `swath-projector` directory we have ``` * `reference_files` contains golden template files for expected outputs of - `tests. + `tests`. When you add new binary files to your test, you should configure + them to to use Git LFS as well as keep them as small as possible. * `SwathProjector_Regression.ipynb` is the regression test Jupyter notebook itself, running tests in cells. A test suite fails when a Jupyter notebook cell returns an error from the execution. Each regression test is designed to diff --git a/script/test-in-bamboo.sh b/script/test-in-bamboo.sh index c9405c6..586e75f 100755 --- a/script/test-in-bamboo.sh +++ b/script/test-in-bamboo.sh @@ -42,7 +42,7 @@ echo "harmony host url: ${harmony_host_url}" ## e.g. if REGRESSION_TESTS_N2Z_IMAGE environment was set, the value would be used instead of the default. image_names=() -all_tests=(harmony harmony-regression hoss hga n2z swath-projector trajectory-subsetter variable-subsetter regridder hybig geoloco net2cog) +all_tests=(harmony harmony-regression hoss hga n2z nsidc-icesat2 swath-projector trajectory-subsetter variable-subsetter regridder hybig geoloco net2cog) for image in "${all_tests[@]}"; do image_names+=($(image_name "$image" true)) done diff --git a/test/.flake8 b/test/.flake8 deleted file mode 100644 index 71f838a..0000000 --- a/test/.flake8 +++ /dev/null @@ -1,4 +0,0 @@ -[flake8] -select = BLK,C,E,F,W -ignore = E203,W503 -max-line-length = 88 diff --git a/test/Dockerfile b/test/Dockerfile index d12023e..0ea1fa4 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -3,9 +3,11 @@ USER root ARG sub_dir ARG notebook +ARG shared_utils=false ENV env_sub_dir=$sub_dir ENV env_notebook=$notebook + ENV AWS_SECRET_ACCESS_KEY=no-access ENV AWS_ACCESS_KEY_ID=no-access ENV AWS_SESSION_TOKEN=no-access @@ -23,6 +25,14 @@ COPY ${sub_dir}/environment.yaml ./${sub_dir} RUN micromamba create -y -f ${sub_dir}/environment.yaml \ && micromamba clean --all --force-pkgs-dirs --yes +# Include shared utility functions if requested. This is a bit awkward, it +# always copies the shared utils directory to the image, but then deletes it if +# you didn't want it. +COPY shared_utils ./shared_utils +RUN if [ "$shared_utils" = "false" ]; then \ + rm -rf ./shared_utils; \ + fi + COPY ${sub_dir} ./${sub_dir} ENTRYPOINT /bin/bash ./notebook-entrypoint.sh diff --git a/test/Makefile b/test/Makefile index fd909f4..503c69f 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,37 +1,65 @@ harmony-image: Dockerfile harmony/environment.yaml - docker build -t ghcr.io/nasa/regression-tests-harmony:latest -f ./Dockerfile --build-arg notebook=Harmony.ipynb --build-arg sub_dir=harmony . + docker build -t ghcr.io/nasa/regression-tests-harmony:latest -f ./Dockerfile \ + --build-arg notebook=Harmony.ipynb --build-arg sub_dir=harmony . harmony-regression-image: Dockerfile harmony-regression/environment.yaml - docker build -t ghcr.io/nasa/regression-tests-harmony-regression:latest -f ./Dockerfile --build-arg notebook=HarmonyRegression.ipynb --build-arg sub_dir=harmony-regression . + docker build -t ghcr.io/nasa/regression-tests-harmony-regression:latest -f ./Dockerfile \ + --build-arg notebook=HarmonyRegression.ipynb --build-arg sub_dir=harmony-regression . hga-image: Dockerfile hga/environment.yaml - docker build -t ghcr.io/nasa/regression-tests-hga:latest -f ./Dockerfile --build-arg notebook=HGA_Regression.ipynb --build-arg sub_dir=hga . + docker build -t ghcr.io/nasa/regression-tests-hga:latest -f ./Dockerfile \ + --build-arg notebook=HGA_Regression.ipynb --build-arg sub_dir=hga . hoss-image: Dockerfile hoss/environment.yaml - docker build -t ghcr.io/nasa/regression-tests-hoss:latest -f ./Dockerfile --build-arg notebook=HOSS_Regression.ipynb --build-arg sub_dir=hoss . + docker build -t ghcr.io/nasa/regression-tests-hoss:latest -f ./Dockerfile \ + --build-arg notebook=HOSS_Regression.ipynb --build-arg sub_dir=hoss . hybig-image: Dockerfile hybig/environment.yaml - docker build -t ghcr.io/nasa/regression-tests-hybig:latest -f ./Dockerfile --build-arg notebook=HyBIG_Regression.ipynb --build-arg sub_dir=hybig . + docker build -t ghcr.io/nasa/regression-tests-hybig:latest -f ./Dockerfile \ + --build-arg notebook=HyBIG_Regression.ipynb --build-arg sub_dir=hybig . n2z-image: Dockerfile n2z/environment.yaml - docker build -t ghcr.io/nasa/regression-tests-n2z:latest -f ./Dockerfile --build-arg notebook=N2Z_Regression.ipynb --build-arg sub_dir=n2z . + docker build -t ghcr.io/nasa/regression-tests-n2z:latest -f ./Dockerfile \ + --build-arg notebook=N2Z_Regression.ipynb --build-arg sub_dir=n2z . + +nsidc-icesat2-image: Dockerfile nsidc-icesat2/environment.yaml + docker build -t ghcr.io/nasa/regression-tests-nsidc-icesat2:latest -f ./Dockerfile \ + --build-arg notebook=NSIDC-ICESAT2_Regression.ipynb --build-arg sub_dir=nsidc-icesat2 --build-arg shared_utils=true . regridder-image: Dockerfile regridder/environment.yaml - docker build -t ghcr.io/nasa/regression-tests-regridder:latest -f ./Dockerfile --build-arg notebook=Regridder_Regression.ipynb --build-arg sub_dir=regridder . + docker build -t ghcr.io/nasa/regression-tests-regridder:latest -f ./Dockerfile \ + --build-arg notebook=Regridder_Regression.ipynb --build-arg sub_dir=regridder . swath-projector-image: Dockerfile swath-projector/environment.yaml - docker build -t ghcr.io/nasa/regression-tests-swath-projector:latest -f ./Dockerfile --build-arg notebook=SwathProjector_Regression.ipynb --build-arg sub_dir=swath-projector . + docker build -t ghcr.io/nasa/regression-tests-swath-projector:latest -f ./Dockerfile \ + --build-arg notebook=SwathProjector_Regression.ipynb --build-arg sub_dir=swath-projector . trajectory-subsetter-image: Dockerfile trajectory-subsetter/environment.yaml - docker build -t ghcr.io/nasa/regression-tests-trajectory-subsetter:latest -f ./Dockerfile --build-arg notebook=TrajectorySubsetter_Regression.ipynb --build-arg sub_dir=trajectory-subsetter . + docker build -t ghcr.io/nasa/regression-tests-trajectory-subsetter:latest -f ./Dockerfile \ + --build-arg notebook=TrajectorySubsetter_Regression.ipynb --build-arg sub_dir=trajectory-subsetter . variable-subsetter-image: Dockerfile variable-subsetter/environment.yaml - docker build -t ghcr.io/nasa/regression-tests-variable-subsetter:latest -f ./Dockerfile --build-arg notebook=VariableSubsetter_Regression.ipynb --build-arg sub_dir=variable-subsetter . + docker build -t ghcr.io/nasa/regression-tests-variable-subsetter:latest -f ./Dockerfile \ + --build-arg notebook=VariableSubsetter_Regression.ipynb --build-arg sub_dir=variable-subsetter . geoloco-image: Dockerfile geoloco/environment.yaml - docker build -t ghcr.io/nasa/regression-tests-geoloco:latest -f ./Dockerfile --build-arg notebook=Geoloco_Regression.ipynb --build-arg sub_dir=geoloco . + docker build -t ghcr.io/nasa/regression-tests-geoloco:latest -f ./Dockerfile \ + --build-arg notebook=Geoloco_Regression.ipynb --build-arg sub_dir=geoloco . net2cog-image: Dockerfile net2cog/environment.yaml - docker build -t ghcr.io/nasa/regression-tests-net2cog:latest -f ./Dockerfile --build-arg notebook=net2cog_Regression.ipynb --build-arg sub_dir=net2cog . + docker build -t ghcr.io/nasa/regression-tests-net2cog:latest -f ./Dockerfile \ + --build-arg notebook=net2cog_Regression.ipynb --build-arg sub_dir=net2cog . -images: harmony-image harmony-regression-image hga-image hoss-image hybig-image n2z-image regridder-image swath-projector-image trajectory-subsetter-image variable-subsetter-image geoloco-image net2cog-image +images: harmony-image \ + harmony-regression-image \ + hga-image \ + hoss-image \ + hybig-image \ + n2z-image \ + nsidc-icesat2-image \ + regridder-image \ + swath-projector-image \ + trajectory-subsetter-image \ + variable-subsetter-image \ + geoloco-image \ + net2cog-image diff --git a/test/nsidc-icesat2/NSIDC-ICESAT2_Regression.ipynb b/test/nsidc-icesat2/NSIDC-ICESAT2_Regression.ipynb new file mode 100644 index 0000000..516f307 --- /dev/null +++ b/test/nsidc-icesat2/NSIDC-ICESAT2_Regression.ipynb @@ -0,0 +1,435 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b26a933e-65ac-4a7a-b298-855c0b845ff1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# NSIDC ICESat2 Regression tests\n", + "\n", + "### This juypter notebook runs and verifies a series of test requests against NSIDC's ICESat2 data.\n", + "\n", + "Requests are submitted and the retrieved data compared to a set of verified results.\n", + "\n", + "Sample requests include:\n", + "\n", + "- Subset by bounding box\n", + "- Subset by temporal range\n", + "- Subset by shapefile \n", + "\n", + "\n", + "We test against ICESat2 v6 collections:\n", + "[ATL03](https://nsidc.org/data/atl03/versions/6), [ATL04](https://nsidc.org/data/atl04/versions/6), [ATL06](https://nsidc.org/data/atl06/versions/6), [ATL07](https://nsidc.org/data/atl07/versions/6), [ATL08](https://nsidc.org/data/atl08/versions/6), [ATL09](https://nsidc.org/data/atl09/versions/6), [ATL10](https://nsidc.org/data/atl10/versions/6), [ATL12](https://nsidc.org/data/atl12/versions/6) and [ATL13](https://nsidc.org/data/atl13/versions/6)\n", + "\n", + "\n", + "## Prerequisites\n", + "\n", + "The dependencies for running this notebook are listed in the\n", + "[environment.yaml](https://github.com/nasa/harmony-regression-tests/blob/main/test/nsidc-icesat2/environment.yaml).\n", + "\n", + "In order to test locally, run the following commands from the `test/nsidc-icesat2/` directory to create and activate the conda environment necessary to run the regression testing notebook.\n", + "\n", + "```sh\n", + "conda env create -f ./environment.yaml && conda activate papermill-nsidc-icesat2\n", + "```\n", + "\n", + "To use this environment within a shared Jupyter Hub, see [instructions](https://nasa-openscapes.github.io/earthdata-cloud-cookbook/contributing/workflow.html#create-a-jupyter-kernel-to-run-notebooks) in the NASA Earthdata Cloud Cookbook for how to create a new kernel based on this environment. \n", + "\n", + "## Authentication\n", + "\n", + "To provide your credentials to harmony, a `.netrc` file must be located in the `test` directory of this repository.\n", + "Ensure the credentials in this .netrc belong to a user that can access the NSIDC data which is protected by ACLs in UAT and SIT.\n", + "\n", + "\n", + "## Set the Harmony environment:\n", + "\n", + "The next cell below sets the `harmony_host_url` to one of the following valid values:\n", + "\n", + "* Production: \n", + "* UAT: \n", + "* SIT: \n", + "* Local: \n", + "\n", + "By default, the value is set to use Harmony's UAT environment. You can modify the target environment in two ways when using this notebook.\n", + "\n", + "* Run this notebook in a local Jupyter notebook server and simply edit the value of `harmony_host_url` in the cell below to be the desired value for your environment.\n", + "\n", + "* Run the `run_notebooks.sh` script, which uses the papermill library to parameterize and run notebooks. Before running, set the environment variable `HARMONY_HOST_URL` to the desired environment's URL from the list above. This variable will override the default value in the cell below, allowing papermill to inject the correct URL into the notebook at runtime." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f08acc3f-6bc7-4a2e-a602-cc10ecb27677", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "harmony_host_url = 'https://harmony.uat.earthdata.nasa.gov'" + ] + }, + { + "cell_type": "markdown", + "id": "861e2e7d-9627-4c4f-9eda-77e915a3cc2d", + "metadata": {}, + "source": [ + "### Import required packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6c54643-4653-4368-9158-1deb8197f9c9", + "metadata": {}, + "outputs": [], + "source": [ + "from harmony import BBox, Client, Collection, Dimension, Environment, Request\n", + "from os.path import exists\n", + "from datetime import datetime\n", + "from pathlib import Path\n", + "from tempfile import TemporaryDirectory" + ] + }, + { + "cell_type": "markdown", + "id": "723b8b8e-312c-4bdc-9620-3cbcb48183d7", + "metadata": {}, + "source": [ + "#### Import shared utility functions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4919504b-1f79-4073-ae79-9a23fb1f906f", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "sys.path.append('../shared_utils')\n", + "from utilities import (\n", + " print_error,\n", + " print_success,\n", + " submit_and_download,\n", + " compare_results_to_reference_file_new,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "b38863ea-7be5-4251-afa8-9669e973fc36", + "metadata": {}, + "source": [ + "### Set up test information" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc20331c-0f04-4d38-8a38-96431fde6088", + "metadata": {}, + "outputs": [], + "source": [ + "non_production_configuration = {\n", + " 'subset_bounding_box': {\n", + " 'ATL03': {\n", + " 'collection_concept_id': Collection(id='C1256407609-NSIDC_CUAT'),\n", + " 'granule_id': 'G1262402516-NSIDC_CUAT',\n", + " 'spatial': BBox(-105.5, 40.0, -105.0, 40.005),\n", + " },\n", + " 'ATL07': {\n", + " 'collection_concept_id': Collection(id='C1256535488-NSIDC_CUAT'),\n", + " 'granule_id': 'G1261684946-NSIDC_CUAT',\n", + " 'spatial': BBox(-112.0, 80.0, -93.0, 80.3),\n", + " },\n", + " 'ATL08': {\n", + " 'collection_concept_id': Collection(id='C1256432189-NSIDC_CUAT'),\n", + " 'granule_id': 'G1260745539-NSIDC_CUAT',\n", + " 'spatial': BBox(-105.5, 40.0, -105.0, 40.25),\n", + " },\n", + " 'ATL10': {\n", + " 'collection_concept_id': Collection(id='C1256535487-NSIDC_CUAT'),\n", + " 'granule_id': 'G1261681735-NSIDC_CUAT',\n", + " 'spatial': BBox(161.0, -75.0, 171.0, -74.0),\n", + " },\n", + " 'ATL12': {\n", + " 'collection_concept_id': Collection(id='C1256476536-NSIDC_CUAT'),\n", + " 'granule_id': 'G1263137424-NSIDC_CUAT',\n", + " 'spatial': BBox(-79.0, 27.0, -77.0, 34.0),\n", + " },\n", + " 'ATL13': {\n", + " 'collection_concept_id': Collection(id='C1257810199-NSIDC_CUAT'),\n", + " 'granule_id': 'G1261681705-NSIDC_CUAT',\n", + " 'spatial': BBox(-89.0, 43.0, -75.0, 45.0),\n", + " },\n", + " },\n", + " 'subset_by_temporal_range': {\n", + " 'ATL04': {\n", + " 'collection_concept_id': Collection(id='C1256535558-NSIDC_CUAT'),\n", + " 'granule_id': 'G1256952662-NSIDC_CUAT',\n", + " 'temporal': {\n", + " 'start': datetime.fromisoformat(\"2020-04-08T08:00:00.000Z\"),\n", + " 'stop': datetime.fromisoformat(\"2020-04-08T08:05:00.000Z\"),\n", + " },\n", + " 'coords_to_rename': ['delta_time'],\n", + " },\n", + " # BLOCKED by https://bugs.earthdata.nasa.gov/browse/DAS-2233\n", + " # 'ATL08': {\n", + " # 'collection_concept_id': Collection(id='C1256432189-NSIDC_CUAT'),\n", + " # 'granule_id': 'G1261385533-NSIDC_CUAT',\n", + " # 'temporal': {\n", + " # 'start': datetime.fromisoformat(\"2022-07-31T23:01:00.000Z\"),\n", + " # 'stop': datetime.fromisoformat(\"2022-07-31T23:01:10.000Z\"),\n", + " # },\n", + " # 'coords_to_rename': [],\n", + " # },\n", + " },\n", + " 'subset_by_shapefile': {\n", + " 'ATL06': {\n", + " 'collection_concept_id': Collection(id='C1256358217-NSIDC_CUAT'),\n", + " 'granule_id': 'G1260779121-NSIDC_CUAT',\n", + " 'shape': 'ancillary/Iceland_sliver.zip',\n", + " },\n", + " 'ATL08': {\n", + " 'collection_concept_id': Collection(id='C1256432189-NSIDC_CUAT'),\n", + " 'granule_id': 'G1260498664-NSIDC_CUAT',\n", + " 'shape': 'ancillary/SriLanka_simple.kml',\n", + " },\n", + " 'ATL09': {\n", + " 'collection_concept_id': Collection(id='C1256563776-NSIDC_CUAT'),\n", + " 'granule_id': 'G1262106425-NSIDC_CUAT',\n", + " 'shape': 'ancillary/Tasmania_sliver.geojson',\n", + " },\n", + " 'ATL10': {\n", + " 'collection_concept_id': Collection(id='C1256535487-NSIDC_CUAT'),\n", + " 'granule_id': 'G1261681735-NSIDC_CUAT',\n", + " 'shape': 'ancillary/Ross_Sea_positive_lon_only.geojson',\n", + " },\n", + " },\n", + "}\n", + "\n", + "\n", + "environment_configuration = {\n", + " 'https://harmony.uat.earthdata.nasa.gov': {\n", + " **non_production_configuration,\n", + " 'env': Environment.UAT,\n", + " },\n", + " 'https://harmony.sit.earthdata.nasa.gov': {\n", + " **non_production_configuration,\n", + " 'env': Environment.SIT,\n", + " },\n", + " 'http://localhost:3000': {\n", + " **non_production_configuration,\n", + " 'env': Environment.LOCAL,\n", + " },\n", + "}\n", + "\n", + "configuration = environment_configuration.get(harmony_host_url)\n", + "\n", + "if configuration is not None:\n", + " harmony_client = Client(env=configuration['env'])" + ] + }, + { + "cell_type": "markdown", + "id": "b115ba16-0d31-43b0-9f57-6a7061b7eeda", + "metadata": {}, + "source": [ + "### Run Bounding Box Tests" + ] + }, + { + "cell_type": "markdown", + "id": "e0788c88-6aee-450a-be35-087ba36a1d80", + "metadata": {}, + "source": [ + "The next cell runs through each of the subset by bounding box tests forming requests that are submitted to Harmony and comparing the downloaded results against reference data files that have been verified. This ensures that Harmony continues to return the expected binary files for expected requests." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e745045b-10fc-4e6c-808c-3bdaf2725d03", + "metadata": {}, + "outputs": [], + "source": [ + "test_name = 'subset_bounding_box'\n", + "with TemporaryDirectory() as tmp_dir:\n", + " if configuration is not None:\n", + " for shortname, test_config in configuration[test_name].items():\n", + " test_request = Request(\n", + " collection=test_config['collection_concept_id'],\n", + " granule_id=[test_config['granule_id']],\n", + " spatial=test_config['spatial'],\n", + " )\n", + " test_output = tmp_dir / Path(f'{shortname}_{test_name}.h5')\n", + " test_reference = Path(\n", + " f'reference_files/{test_output.stem}_reference{test_output.suffix}'\n", + " )\n", + "\n", + " submit_and_download(harmony_client, test_request, test_output)\n", + "\n", + " assert exists(\n", + " test_output\n", + " ), 'Unsuccessful Harmony Request: {shortname}: {test_name}'\n", + " compare_results_to_reference_file_new(test_output, test_reference)\n", + " print_success(f'{shortname} {test_name} test request.')\n", + "\n", + " print_success(f'{test_name} test suite.')\n", + " else:\n", + " print(\n", + " f'Bounding box tests not configured for environment: {harmony_environment} - skipping tests'\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "f0f860c5-9c4e-4b83-920b-02353c8f5114", + "metadata": {}, + "source": [ + "## Run Temporal Range Tests" + ] + }, + { + "cell_type": "markdown", + "id": "edd687f2-d3cd-4dc0-8eed-6f205b6f1ab4", + "metadata": {}, + "source": [ + "As with the previous cell, The next cell runs through each of the temporal range tests forming requests that are submitted to Harmony and comparing the downloaded results against reference data files that have been verified. This ensures that Harmony continues to return the expected binary files for expected requests." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20ab50f4-c142-4733-858e-b4fba1053386", + "metadata": {}, + "outputs": [], + "source": [ + "test_name = 'subset_by_temporal_range'\n", + "with TemporaryDirectory() as tmp_dir:\n", + " if configuration is not None:\n", + " for shortname, test_config in configuration[test_name].items():\n", + " test_request = Request(\n", + " collection=test_config['collection_concept_id'],\n", + " granule_id=[test_config['granule_id']],\n", + " temporal=test_config['temporal'],\n", + " )\n", + " test_output = tmp_dir / Path(f'{shortname}_{test_name}.h5')\n", + " test_reference = Path(\n", + " f'reference_files/{test_output.stem}_reference{test_output.suffix}'\n", + " )\n", + "\n", + " submit_and_download(harmony_client, test_request, test_output)\n", + "\n", + " assert exists(\n", + " test_output\n", + " ), 'Unsuccessful Harmony Request: {shortname}: {test_name}'\n", + " compare_results_to_reference_file_new(\n", + " test_output, test_reference, test_config['coords_to_rename']\n", + " )\n", + " print_success(f'{shortname} {test_name} test request.')\n", + "\n", + " print_success(f'{test_name} test suite.')\n", + " else:\n", + " print(\n", + " f'Bounding box tests not configured for environment: {harmony_environment} - skipping tests'\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "905f5f9e-2fca-456d-ae37-7ac49f6296ce", + "metadata": {}, + "source": [ + "## Run Subset by Shapefile Tests" + ] + }, + { + "cell_type": "markdown", + "id": "b434c1ab-0e5a-40fb-b94a-60ad721c3c5e", + "metadata": {}, + "source": [ + "This next cell runs through each of the subset by shapefile tests forming requests that are submitted to Harmony and comparing the downloaded results against reference data files that have been verified. This ensures that Harmony continues to return the expected binary files for expected requests." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e91d4136-74c4-4bbc-8217-44a27296d196", + "metadata": {}, + "outputs": [], + "source": [ + "test_name = 'subset_by_shapefile'\n", + "with TemporaryDirectory() as tmp_dir:\n", + " if configuration is not None:\n", + " for shortname, test_config in configuration[test_name].items():\n", + " test_request = Request(\n", + " collection=test_config['collection_concept_id'],\n", + " granule_id=[test_config['granule_id']],\n", + " shape=test_config['shape'],\n", + " )\n", + " test_output = tmp_dir / Path(f'{shortname}_{test_name}.h5')\n", + " test_reference = Path(\n", + " f'reference_files/{test_output.stem}_reference{test_output.suffix}'\n", + " )\n", + "\n", + " submit_and_download(harmony_client, test_request, test_output)\n", + "\n", + " assert exists(\n", + " test_output\n", + " ), 'Unsuccessful Harmony Request: {shortname}: {test_name}'\n", + " compare_results_to_reference_file_new(test_output, test_reference, [])\n", + " print_success(f'{shortname} {test_name} test request.')\n", + "\n", + " print_success(f'{test_name} test suite.')\n", + " else:\n", + " print(\n", + " f'Bounding box tests not configured for environment: {harmony_environment} - skipping tests'\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf49d848-f90e-46e5-b13f-5f7d1eaf9e1c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/test/nsidc-icesat2/ancillary/Iceland_sliver.zip b/test/nsidc-icesat2/ancillary/Iceland_sliver.zip new file mode 100644 index 0000000000000000000000000000000000000000..59448e00ef09c53746c8c4e21b8fe79605195f7c GIT binary patch literal 1136 zcmWIWW@h1HW&i>k4%x^6n?^oXARC0GfY>uRH77AICB8T(vn;hpw;(^KGCe;}FS#H+ zG{jBUA^@ma1gMzF0k{%Fd%rCfb~r=41NL0a)}=G|jj zhZ;}Sbe^8#ELtzU<+GPH=L_o%d=9rAg-=$VOny@PYs*s8g5?JG85~FjGAJxIvn<`Ci0v&6^h&ND+GYWe9_OEU%6gjqk*WIUOyFcsBe%D`> zzkSb&>5T4dU0*`Oq?MNH)TKU@-w@Hsf7M9y*g{8kF(*+&q_%M0JbL3tPYP?fSiQuKnM*&1bsL$4cI~E1$bWkfpHS&r6nG098V6ka`W(h3X5hel@fb*ZT5sEcJ)3!U+nK*?b~FYKcif2itb8d zrT}k7CVOVwX&&e(AW&#n(g>n)=KzQ~xRXCfH^dxUAdSNuU}iu!2R(g*jRCu732t+M ysU6)M^kj!HClr_vakvMV2+_?!k1d2bX-s&`sR;09Wdo^Y1;PxVjh}$385jV-lZPq* literal 0 HcmV?d00001 diff --git a/test/nsidc-icesat2/ancillary/Ross_Sea_positive_lon_only.geojson b/test/nsidc-icesat2/ancillary/Ross_Sea_positive_lon_only.geojson new file mode 100644 index 0000000..dfaed59 --- /dev/null +++ b/test/nsidc-icesat2/ancillary/Ross_Sea_positive_lon_only.geojson @@ -0,0 +1,52 @@ +{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": {}, + "geometry": { + "coordinates": [ + [ + [ + 162.52151412573556, + -77.7084708314948 + ], + [ + 166.3707220857782, + -78.541252907192 + ], + [ + 169.2041153428002, + -75.17774016372276 + ], + [ + 172.8447051666078, + -71.79752466954675 + ], + [ + 170.4131598509282, + -71.36456539598191 + ], + [ + 168.66434877304175, + -73.06979598097125 + ], + [ + 166.9932724608791, + -73.17598241641015 + ], + [ + 161.87169666187822, + -74.9710519807391 + ], + [ + 162.52151412573556, + -77.7084708314948 + ] + ] + ], + "type": "Polygon" + } + } + ] +} diff --git a/test/nsidc-icesat2/ancillary/SriLanka_simple.kml b/test/nsidc-icesat2/ancillary/SriLanka_simple.kml new file mode 100644 index 0000000..2d7a3a6 --- /dev/null +++ b/test/nsidc-icesat2/ancillary/SriLanka_simple.kml @@ -0,0 +1,24 @@ + + + + + + + + + 80.02420921071848,9.990579976439335 +79.49121184054661,8.222880115071774 +79.73372495484273,6.816238576871626 +80.11778379109734,5.75280827002905 +80.61315952333291,5.688365377230227 +81.96149662442872,6.375816828113827 +82.02944762403882,7.861265979225465 +81.3420338209853,8.953558174386274 +80.52520368366288,9.778795859214341 +80.02420921071848,9.990579976439335 + + + + + + diff --git a/test/nsidc-icesat2/ancillary/Tasmania_sliver.geojson b/test/nsidc-icesat2/ancillary/Tasmania_sliver.geojson new file mode 100644 index 0000000..c115900 --- /dev/null +++ b/test/nsidc-icesat2/ancillary/Tasmania_sliver.geojson @@ -0,0 +1,36 @@ +{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": {}, + "geometry": { + "coordinates": [ + [ + [ + 145.0029842290342, + -41.80315161643407 + ], + [ + 145.18049209479472, + -42.111337298683694 + ], + [ + 148.37787460845385, + -42.12167177793065 + ], + [ + 148.3253351836085, + -41.73567040285831 + ], + [ + 145.0029842290342, + -41.80315161643407 + ] + ] + ], + "type": "Polygon" + } + } + ] +} diff --git a/test/nsidc-icesat2/environment.yaml b/test/nsidc-icesat2/environment.yaml new file mode 100644 index 0000000..1e29321 --- /dev/null +++ b/test/nsidc-icesat2/environment.yaml @@ -0,0 +1,13 @@ +name: papermill-nsidc-icesat2 +channels: + - conda-forge +dependencies: + - python=3.11.5 + - netCDF4 + - notebook=7.2.1 + - numpy + - papermill + - pip + - pip: + - harmony-py==0.4.15 + - xarray==2024.9.0 diff --git a/test/nsidc-icesat2/reference_files/ATL03_subset_bounding_box_reference.h5 b/test/nsidc-icesat2/reference_files/ATL03_subset_bounding_box_reference.h5 new file mode 100644 index 0000000..1a1b16e --- /dev/null +++ b/test/nsidc-icesat2/reference_files/ATL03_subset_bounding_box_reference.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37f651389ece5b020090d5c24ff233d5e5e9cc930bae0549ac094f82ce29991a +size 59773221 diff --git a/test/nsidc-icesat2/reference_files/ATL04_subset_by_temporal_range_reference.h5 b/test/nsidc-icesat2/reference_files/ATL04_subset_by_temporal_range_reference.h5 new file mode 100644 index 0000000..590560c --- /dev/null +++ b/test/nsidc-icesat2/reference_files/ATL04_subset_by_temporal_range_reference.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33441d69e30f4f41609050f31410d8bc23133be261fe80bc389359c6886ae99e +size 45649950 diff --git a/test/nsidc-icesat2/reference_files/ATL06_subset_by_shapefile_reference.h5 b/test/nsidc-icesat2/reference_files/ATL06_subset_by_shapefile_reference.h5 new file mode 100644 index 0000000..9215c07 --- /dev/null +++ b/test/nsidc-icesat2/reference_files/ATL06_subset_by_shapefile_reference.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f5dae4e437071cffeae4984a642b212b4407e560cbc6cd2326036fd772580b3 +size 4005585 diff --git a/test/nsidc-icesat2/reference_files/ATL07_subset_bounding_box_reference.h5 b/test/nsidc-icesat2/reference_files/ATL07_subset_bounding_box_reference.h5 new file mode 100644 index 0000000..e62c321 --- /dev/null +++ b/test/nsidc-icesat2/reference_files/ATL07_subset_bounding_box_reference.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03c44870615c0e91645861d3c8fa782dceda2e178985bdef486a87c86723e70a +size 7671052 diff --git a/test/nsidc-icesat2/reference_files/ATL08_subset_bounding_box_reference.h5 b/test/nsidc-icesat2/reference_files/ATL08_subset_bounding_box_reference.h5 new file mode 100644 index 0000000..c503137 --- /dev/null +++ b/test/nsidc-icesat2/reference_files/ATL08_subset_bounding_box_reference.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51f1cb5ae5b2273f56983aa55abad50fdd4a7f0a474f66cecba73a056065675e +size 4549080 diff --git a/test/nsidc-icesat2/reference_files/ATL08_subset_by_shapefile_reference.h5 b/test/nsidc-icesat2/reference_files/ATL08_subset_by_shapefile_reference.h5 new file mode 100644 index 0000000..a763339 --- /dev/null +++ b/test/nsidc-icesat2/reference_files/ATL08_subset_by_shapefile_reference.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:154d5c6b89c9b157120d846b739a0a96f4f2ab372428f7f7254a77cbfb304d0d +size 6123965 diff --git a/test/nsidc-icesat2/reference_files/ATL09_subset_by_shapefile_reference.h5 b/test/nsidc-icesat2/reference_files/ATL09_subset_by_shapefile_reference.h5 new file mode 100644 index 0000000..817eb47 --- /dev/null +++ b/test/nsidc-icesat2/reference_files/ATL09_subset_by_shapefile_reference.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2af5d04b844ad9f64ad4d2276e1cf74a7ed64ca601c00b22aed6ff9b29aeb5f9 +size 36801327 diff --git a/test/nsidc-icesat2/reference_files/ATL10_subset_bounding_box_reference.h5 b/test/nsidc-icesat2/reference_files/ATL10_subset_bounding_box_reference.h5 new file mode 100644 index 0000000..e8aea3e --- /dev/null +++ b/test/nsidc-icesat2/reference_files/ATL10_subset_bounding_box_reference.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b16c7c3f8eee1f7d79711327390605af97fafceeb367baa6290bbcccd7b9bab5 +size 3211908 diff --git a/test/nsidc-icesat2/reference_files/ATL10_subset_by_shapefile_reference.h5 b/test/nsidc-icesat2/reference_files/ATL10_subset_by_shapefile_reference.h5 new file mode 100644 index 0000000..4600f02 --- /dev/null +++ b/test/nsidc-icesat2/reference_files/ATL10_subset_by_shapefile_reference.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347f55c0e290f04887705bb7d343db48ecdf3aa42fc19807a660c96cd65e2b5f +size 22968244 diff --git a/test/nsidc-icesat2/reference_files/ATL12_subset_bounding_box_reference.h5 b/test/nsidc-icesat2/reference_files/ATL12_subset_bounding_box_reference.h5 new file mode 100644 index 0000000..7fd1f49 --- /dev/null +++ b/test/nsidc-icesat2/reference_files/ATL12_subset_bounding_box_reference.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a0ed331540cfe70dcf714f373ec771c89d863b4573724bce73aa555b5e05d36 +size 8004873 diff --git a/test/nsidc-icesat2/reference_files/ATL13_subset_bounding_box_reference.h5 b/test/nsidc-icesat2/reference_files/ATL13_subset_bounding_box_reference.h5 new file mode 100644 index 0000000..983faaa --- /dev/null +++ b/test/nsidc-icesat2/reference_files/ATL13_subset_bounding_box_reference.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c9fb8b8f570eccea191e776cfee2ec92d008e0f93621539fe1f8159b22fcac5 +size 26839348 diff --git a/test/nsidc-icesat2/version.txt b/test/nsidc-icesat2/version.txt new file mode 100644 index 0000000..8acdd82 --- /dev/null +++ b/test/nsidc-icesat2/version.txt @@ -0,0 +1 @@ +0.0.1 diff --git a/test/run_notebooks.sh b/test/run_notebooks.sh index 5050091..780492b 100755 --- a/test/run_notebooks.sh +++ b/test/run_notebooks.sh @@ -15,7 +15,7 @@ echo "Running regression tests" # Specify the test images to run, by default all built by the Makefile. If # the script is invoked with a list of images, only run those. -all_images=(harmony harmony-regression hoss hga n2z swath-projector trajectory-subsetter variable-subsetter regridder hybig geoloco net2cog) +all_images=(harmony harmony-regression hoss hga n2z nsidc-icesat2 swath-projector trajectory-subsetter variable-subsetter regridder hybig geoloco net2cog) specified_images=() # Parse command line arguments while [[ $# -gt 0 ]]; do diff --git a/test/shared_utils/README.md b/test/shared_utils/README.md new file mode 100644 index 0000000..280d23d --- /dev/null +++ b/test/shared_utils/README.md @@ -0,0 +1,52 @@ +## This directory contains common utility functions that can be shared across regression tests. + +This directory can be included in your test suite by adding a build-arg to the docker build command in the Makefile. + +```sh +nsidc-icesat2-image: Dockerfile nsidc-icesat2/environment.yaml + docker build -t ghcr.io/nasa/regression-tests-nsidc-icesat2:latest -f ./Dockerfile \ + --build-arg notebook=NSIDC-ICESAT2_Regression.ipynb --build-arg sub_dir=nsidc-icesat2 --build-arg shared_utils=true . +``` + +Doing this will cause this directory and all its files to be included at `/workdir/shared_utils` in your container. + +## Include the necessary python packages in your test's pip_requirements.txt + +The test environment is determined by the environment.yaml in the test directory, but if you are including `shared_utils` you will need to also include harmony-py and either xarray-datatree or a fancy pinned version of xarray + +For example the pip requirements in the nsidc_icesat2 environment file : +``` +name: papermill-nsidc-icesat2 +channels: + - conda-forge +dependencies: + - python=3.11.5 + - netCDF4 + - notebook=7.2.1 + - numpy + - papermill + - pip + - pip: + - harmony-py==0.4.15 + - git+https://github.com/pydata/xarray.git@ca2e9d6#egg=xarray +``` + + +## Using the shared utility routines + +To use routines from the `shared_utils` dir you need to add the `../shared_utils` directory to the Python module search path using `sys.path.append()` so that the modules will be found. + +```python +## Import shared utility routines: +import sys + +sys.path.append('../shared_utils') +from utilities import ( + print_error, + print_success, + submit_and_download, + compare_results_to_reference_file, +) + +print_success('yay! you imported the functions.') +``` diff --git a/test/shared_utils/utilities.py b/test/shared_utils/utilities.py new file mode 100644 index 0000000..b1407ec --- /dev/null +++ b/test/shared_utils/utilities.py @@ -0,0 +1,148 @@ +""" A module containing common functionality used by multiple regression tests + regression tests. These functions are kept out of the Jupyter notebook to + increase the readability of the regression test suite. + +""" + +from shutil import move +from itertools import count + +from harmony import Client, Request +from harmony.harmony import ProcessingFailedException + +try: + from xarray.backends.api import open_groups + from xarray.core.datatree import DataTree + from xarray import Dataset +except Exception: + # only used by Trajectory Subsetter tests. + # TODO: remove and make Trajectory Subsetter use above + from datatree import open_datatree + + +def print_error(error_string: str) -> str: + """Print an error, with formatting for red text.""" + print(f'\033[91m{error_string}\033[0m') + + +def print_success(success_string: str) -> str: + """Print a success message, with formatting for green text.""" + print(f'\033[92mSuccess: {success_string}\033[0m') + + +def submit_and_download( + harmony_client: Client, request: Request, output_file_name: str +): + """Submit a Harmony request via a `harmony-py` client. Wait for the + Harmony job to finish, then download the results to the specified file + path. + + """ + downloaded_filename = None + + try: + job_id = harmony_client.submit(request) + + for filename in [ + file_future.result() + for file_future in harmony_client.download_all(job_id, overwrite=True) + ]: + + print(f'Downloaded: {filename}') + downloaded_filename = filename + + if downloaded_filename is not None: + move(downloaded_filename, output_file_name) + print(f'Saved output to: {output_file_name}') + + except ProcessingFailedException as exception: + print_error('Harmony request failed to complete successfully.') + raise exception + + +def compare_results_to_reference_file( + results_file_name: str, reference_file_name: str +) -> None: + """Use `DataTree` functionality to compare data values, variables, + coordinates, metadata, and all their corresponding attributes of + downloaded results to a reference file. + + """ + reference_data = open_datatree(reference_file_name) + results_data = open_datatree(results_file_name) + + assert results_data.identical(reference_data), ( + 'Output and reference files ' 'do not match.' + ) + + reference_data = None + results_data = None + + +def compare_results_to_reference_file_new( + results_file_name: str, + reference_file_name: str, + coordinates_to_fix: list[str | None] = [], +) -> None: + """Use `DataTree` functionality to compare data values, variables, + coordinates, metadata, and all their corresponding attributes of + downloaded results to a reference file. + + """ + reference_groups = open_groups(reference_file_name) + results_groups = open_groups(results_file_name) + + # Fix unalignable coordinates + for coord in coordinates_to_fix: + reference_groups = unalign_groups(reference_groups, coord) + results_groups = unalign_groups(results_groups, coord) + + reference_data = DataTree.from_dict(reference_groups) + results_data = DataTree.from_dict(results_groups) + + assert results_data.identical(reference_data), ( + 'Output and reference files ' 'do not match.' + ) + + reference_data = None + results_data = None + + +def unalign_groups( + dict_of_datasets: dict[str, Dataset], coordinate: str +) -> dict[str, Dataset]: + """Rename coordinates with different dimensions across datasets. + + This function addresses the issue of datasets having coordinates with the + same name but different dimensions, which causes problems when creating a + DataTree. Specifically for handling data products like ATL04 ICESat2, where + common coordinates (e.g., "delta_time") have different lengths across + datasets. + + The function renames the specified coordinate in each dataset where it appears, + assigning a unique identifier to each instance. This allows for the creation of + a DataTree from the modified dictionary of datasets. + + Parameters: + ----------- + dict_of_datasets : dict[str, Dataset] + A dictionary of xarray Datasets, typically obtained from xarray.open_groups(). + coordinate : str + The name of the coordinate to be renamed across Datasets. + + Returns: + -------- + dict[str, Dataset] + A new dictionary of datasets with the specified coordinate + incrementally renamed when present. + + """ + counter = count(1) + return { + key: ( + ds.rename({coordinate: f"{coordinate}_{next(counter)}"}) + if coordinate in ds.coords + else ds + ) + for key, ds in dict_of_datasets.items() + }