From e23ee4bc1e51cb542e44fe60ec1acb95f2543146 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Tue, 15 Oct 2024 14:31:56 -0600 Subject: [PATCH] DAS-NONE: Move trajectory subsetter tests to shared_utils (#104) --- .github/workflows/build-all-images.yml | 1 + CHANGELOG.md | 8 ++ README.md | 2 +- test/Makefile | 2 +- .../NSIDC-ICESAT2_Regression.ipynb | 15 +-- test/nsidc-icesat2/version.txt | 2 +- test/shared_utils/README.md | 25 +++- test/shared_utils/compare.py | 91 +++++++++++++++ test/shared_utils/utilities.py | 109 +----------------- .../TrajectorySubsetter_Regression.ipynb | 25 ++-- test/trajectory-subsetter/environment.yaml | 15 ++- test/trajectory-subsetter/local_utilities.py | 19 +++ test/trajectory-subsetter/utilities.py | 82 ------------- test/trajectory-subsetter/version.txt | 2 +- 14 files changed, 171 insertions(+), 227 deletions(-) create mode 100644 test/shared_utils/compare.py create mode 100644 test/trajectory-subsetter/local_utilities.py delete mode 100644 test/trajectory-subsetter/utilities.py diff --git a/.github/workflows/build-all-images.yml b/.github/workflows/build-all-images.yml index b9e5638..c6c6a18 100644 --- a/.github/workflows/build-all-images.yml +++ b/.github/workflows/build-all-images.yml @@ -43,6 +43,7 @@ jobs: - image: "trajectory-subsetter" notebook: "TrajectorySubsetter_Regression.ipynb" + shared-utils: "true" - image: "variable-subsetter" notebook: "VariableSubsetter_Regression.ipynb" diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bdc285..8abd606 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ versioning. Rather than a static releases, this repository contains of a number of regression tests that are each semi-independent. This CHANGELOG file should be used to document pull requests to this repository. +## 2024-10-11 ([#104](https://github.com/nasa/harmony-regression-tests/pull/104)) + +- Migrates trajectory-subsetter to use `shared_utils`. +- Separates `shared_utils/utilities.py` into `utilities.py` and `compare.py` preventing `xarray` from being a mandatory requirement to use `shared_utils`. +- Updates `shared_utils` `README` to mention the github action updates needed to use `shared_utils`. +- Removes old `compare_results_to_reference_file` and renames `compare_results_to_reference_file_new` -> `compare_results_to_reference_file` +- Migrates nsidc_icesat2 tests to the new `shared_utils` structure and names. + ## 2024-10-11 ([#103](https://github.com/nasa/harmony-regression-tests/pull/103)) - Update the ATL03 and ATL08 reference files in the `nsidc-icesat2` regression diff --git a/README.md b/README.md index b6a6c7a..7b2c75d 100644 --- a/README.md +++ b/README.md @@ -222,7 +222,7 @@ channels: - conda-forge - defaults dependencies: - - python=3.7 + - python=3.11 - jupyter - requests - netcdf4 diff --git a/test/Makefile b/test/Makefile index 503c69f..8ebc221 100644 --- a/test/Makefile +++ b/test/Makefile @@ -36,7 +36,7 @@ swath-projector-image: Dockerfile swath-projector/environment.yaml trajectory-subsetter-image: Dockerfile trajectory-subsetter/environment.yaml docker build -t ghcr.io/nasa/regression-tests-trajectory-subsetter:latest -f ./Dockerfile \ - --build-arg notebook=TrajectorySubsetter_Regression.ipynb --build-arg sub_dir=trajectory-subsetter . + --build-arg notebook=TrajectorySubsetter_Regression.ipynb --build-arg sub_dir=trajectory-subsetter --build-arg shared_utils=true . variable-subsetter-image: Dockerfile variable-subsetter/environment.yaml docker build -t ghcr.io/nasa/regression-tests-variable-subsetter:latest -f ./Dockerfile \ diff --git a/test/nsidc-icesat2/NSIDC-ICESAT2_Regression.ipynb b/test/nsidc-icesat2/NSIDC-ICESAT2_Regression.ipynb index 72c2ec7..d9d0698 100644 --- a/test/nsidc-icesat2/NSIDC-ICESAT2_Regression.ipynb +++ b/test/nsidc-icesat2/NSIDC-ICESAT2_Regression.ipynb @@ -121,11 +121,8 @@ "import sys\n", "\n", "sys.path.append('../shared_utils')\n", - "from utilities import (\n", - " print_success,\n", - " submit_and_download,\n", - " compare_results_to_reference_file_new,\n", - ")" + "from utilities import print_success, submit_and_download\n", + "from compare import compare_results_to_reference_file" ] }, { @@ -384,7 +381,7 @@ " assert exists(\n", " test_output\n", " ), 'Unsuccessful Harmony Request: {shortname}: {test_name}'\n", - " compare_results_to_reference_file_new(\n", + " compare_results_to_reference_file(\n", " test_output, test_reference, identical=False\n", " )\n", " print_success(f'{shortname} {test_name} test request.')\n", @@ -438,7 +435,7 @@ " assert exists(\n", " test_output\n", " ), 'Unsuccessful Harmony Request: {shortname}: {test_name}'\n", - " compare_results_to_reference_file_new(\n", + " compare_results_to_reference_file(\n", " test_output,\n", " test_reference,\n", " identical=False,\n", @@ -495,7 +492,7 @@ " assert exists(\n", " test_output\n", " ), 'Unsuccessful Harmony Request: {shortname}: {test_name}'\n", - " compare_results_to_reference_file_new(\n", + " compare_results_to_reference_file(\n", " test_output, test_reference, identical=False, coordinates_to_fix=[]\n", " )\n", " print_success(f'{shortname} {test_name} test request.')\n", @@ -524,7 +521,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.10" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/test/nsidc-icesat2/version.txt b/test/nsidc-icesat2/version.txt index 81340c7..bbdeab6 100644 --- a/test/nsidc-icesat2/version.txt +++ b/test/nsidc-icesat2/version.txt @@ -1 +1 @@ -0.0.4 +0.0.5 diff --git a/test/shared_utils/README.md b/test/shared_utils/README.md index 280d23d..01921b6 100644 --- a/test/shared_utils/README.md +++ b/test/shared_utils/README.md @@ -1,5 +1,7 @@ ## This directory contains common utility functions that can be shared across regression tests. +## Include the build arg on the Makefile for your tests + This directory can be included in your test suite by adding a build-arg to the docker build command in the Makefile. ```sh @@ -8,11 +10,23 @@ nsidc-icesat2-image: Dockerfile nsidc-icesat2/environment.yaml --build-arg notebook=NSIDC-ICESAT2_Regression.ipynb --build-arg sub_dir=nsidc-icesat2 --build-arg shared_utils=true . ``` -Doing this will cause this directory and all its files to be included at `/workdir/shared_utils` in your container. +Doing this will cause this directory and all its files to be included at `/workdir/shared_utils` in your container when you are working locally. + +## Update github workflows to include the build arg for your tests. -## Include the necessary python packages in your test's pip_requirements.txt +To include the shared_utils directory on the regression image built by GitHub you add a `shared_utils` key to the service matrix under your service like was done for the trajectory subsetter in the `.github/workflows/build-all-images.yml` file. -The test environment is determined by the environment.yaml in the test directory, but if you are including `shared_utils` you will need to also include harmony-py and either xarray-datatree or a fancy pinned version of xarray +```yml + - + image: "trajectory-subsetter" + notebook: "TrajectorySubsetter_Regression.ipynb" + shared-utils: "true" + +``` + +## Include the necessary python packages in your test's environment.yaml + +The test environment is determined by the environment.yaml in the test directory, but if you are using routines from `shared_utils` you will need to also update your test's `environment.yaml` to include the libraries that are imported in the shared modules. That means `harmony-py` to use routines from utilities.py and a recent version of `xarray` for ones from `compare.py`. As always you should look in the files to see if there are new requirements. For example the pip requirements in the nsidc_icesat2 environment file : ``` @@ -28,10 +42,9 @@ dependencies: - pip - pip: - harmony-py==0.4.15 - - git+https://github.com/pydata/xarray.git@ca2e9d6#egg=xarray + - xarray==2024.9.0 ``` - ## Using the shared utility routines To use routines from the `shared_utils` dir you need to add the `../shared_utils` directory to the Python module search path using `sys.path.append()` so that the modules will be found. @@ -45,8 +58,8 @@ from utilities import ( print_error, print_success, submit_and_download, - compare_results_to_reference_file, ) +from compare import compare_results_to_reference_file print_success('yay! you imported the functions.') ``` diff --git a/test/shared_utils/compare.py b/test/shared_utils/compare.py new file mode 100644 index 0000000..b4a69a5 --- /dev/null +++ b/test/shared_utils/compare.py @@ -0,0 +1,91 @@ +"""A module containing common functionality used by multiple regression +tests. These functions are kept out of the Jupyter notebook to increase the +readability of the regression test suite. + +This module focuses on comparing output specifically with xarray. +""" + +from itertools import count + + +from xarray.backends.api import open_groups +from xarray.core.datatree import DataTree +from xarray import Dataset + + +def compare_results_to_reference_file( + results_file_name: str, + reference_file_name: str, + identical: bool = True, + coordinates_to_fix: list[str] | None = None, +) -> None: + """Use `DataTree` functionality to compare data values, variables, + coordinates, metadata, and all their corresponding attributes of + downloaded results to a reference file. + + """ + if coordinates_to_fix is None: + coordinates_to_fix = [] + + reference_groups = open_groups(reference_file_name) + results_groups = open_groups(results_file_name) + + # Fix unalignable coordinates + for coord in coordinates_to_fix: + reference_groups = unalign_groups(reference_groups, coord) + results_groups = unalign_groups(results_groups, coord) + + reference_data = DataTree.from_dict(reference_groups) + results_data = DataTree.from_dict(results_groups) + + if identical: + assert results_data.identical( + reference_data + ), 'Output and reference files do not match.' + else: + assert results_data.equals( + reference_data + ), 'Output and reference files do not match.' + + reference_data = None + results_data = None + + +def unalign_groups( + dict_of_datasets: dict[str, Dataset], coordinate: str +) -> dict[str, Dataset]: + """Rename coordinates with different dimensions across datasets. + + This function addresses the issue of datasets having coordinates with the + same name but different dimensions, which causes problems when creating a + DataTree. Specifically for handling data products like ATL04 ICESat2, where + common coordinates (e.g., "delta_time") have different lengths across + datasets. + + The function renames the specified coordinate in each dataset where it appears, + assigning a unique identifier to each instance. This allows for the creation of + a DataTree from the modified dictionary of datasets. + + Parameters: + ----------- + dict_of_datasets : dict[str, Dataset] + A dictionary of xarray Datasets, typically obtained from xarray.open_groups(). + coordinate : str + The name of the coordinate to be renamed across Datasets. + + Returns: + -------- + dict[str, Dataset] + A new dictionary of datasets with the specified coordinate + incrementally renamed when present. + + """ + counter = count(1) + return { + key: ( + ds.rename({coordinate: f"{coordinate}_{next(counter)}"}) + if coordinate in ds.coords + else ds + ) + for key, ds in dict_of_datasets.items() + } diff --git a/test/shared_utils/utilities.py b/test/shared_utils/utilities.py index 391420e..b2dbe0f 100644 --- a/test/shared_utils/utilities.py +++ b/test/shared_utils/utilities.py @@ -1,24 +1,14 @@ -""" A module containing common functionality used by multiple regression tests +""" A module containing common functionality used by multiple regression tests. These functions are kept out of the Jupyter notebook to increase the readability of the regression test suite. """ from shutil import move -from itertools import count from harmony import Client, Request from harmony.harmony import ProcessingFailedException -try: - from xarray.backends.api import open_groups - from xarray.core.datatree import DataTree - from xarray import Dataset -except Exception: - # only used by Trajectory Subsetter tests. - # TODO: remove and make Trajectory Subsetter use above - from datatree import open_datatree - def print_error(error_string: str) -> str: """Print an error, with formatting for red text.""" @@ -58,100 +48,3 @@ def submit_and_download( except ProcessingFailedException as exception: print_error('Harmony request failed to complete successfully.') raise exception - - -def compare_results_to_reference_file( - results_file_name: str, reference_file_name: str -) -> None: - """Use `DataTree` functionality to compare data values, variables, - coordinates, metadata, and all their corresponding attributes of - downloaded results to a reference file. - - """ - reference_data = open_datatree(reference_file_name) - results_data = open_datatree(results_file_name) - - assert results_data.identical(reference_data), ( - 'Output and reference files ' 'do not match.' - ) - - reference_data = None - results_data = None - - -def compare_results_to_reference_file_new( - results_file_name: str, - reference_file_name: str, - identical: bool = True, - coordinates_to_fix: list[str] | None = None, -) -> None: - """Use `DataTree` functionality to compare data values, variables, - coordinates, metadata, and all their corresponding attributes of - downloaded results to a reference file. - - """ - if coordinates_to_fix is None: - coordinates_to_fix = [] - - reference_groups = open_groups(reference_file_name) - results_groups = open_groups(results_file_name) - - # Fix unalignable coordinates - for coord in coordinates_to_fix: - reference_groups = unalign_groups(reference_groups, coord) - results_groups = unalign_groups(results_groups, coord) - - reference_data = DataTree.from_dict(reference_groups) - results_data = DataTree.from_dict(results_groups) - - if identical: - assert results_data.identical( - reference_data - ), 'Output and reference files do not match.' - else: - assert results_data.equals( - reference_data - ), 'Output and reference files do not match.' - - reference_data = None - results_data = None - - -def unalign_groups( - dict_of_datasets: dict[str, Dataset], coordinate: str -) -> dict[str, Dataset]: - """Rename coordinates with different dimensions across datasets. - - This function addresses the issue of datasets having coordinates with the - same name but different dimensions, which causes problems when creating a - DataTree. Specifically for handling data products like ATL04 ICESat2, where - common coordinates (e.g., "delta_time") have different lengths across - datasets. - - The function renames the specified coordinate in each dataset where it appears, - assigning a unique identifier to each instance. This allows for the creation of - a DataTree from the modified dictionary of datasets. - - Parameters: - ----------- - dict_of_datasets : dict[str, Dataset] - A dictionary of xarray Datasets, typically obtained from xarray.open_groups(). - coordinate : str - The name of the coordinate to be renamed across Datasets. - - Returns: - -------- - dict[str, Dataset] - A new dictionary of datasets with the specified coordinate - incrementally renamed when present. - - """ - counter = count(1) - return { - key: ( - ds.rename({coordinate: f"{coordinate}_{next(counter)}"}) - if coordinate in ds.coords - else ds - ) - for key, ds in dict_of_datasets.items() - } diff --git a/test/trajectory-subsetter/TrajectorySubsetter_Regression.ipynb b/test/trajectory-subsetter/TrajectorySubsetter_Regression.ipynb index bc0951b..4262aa7 100644 --- a/test/trajectory-subsetter/TrajectorySubsetter_Regression.ipynb +++ b/test/trajectory-subsetter/TrajectorySubsetter_Regression.ipynb @@ -37,25 +37,30 @@ { "cell_type": "code", "execution_count": null, - "id": "7ce7f640", + "id": "f4489f6d-50d2-46a3-ad52-1da296237693", "metadata": { - "jupyter": { - "source_hidden": true - }, "tags": [] }, "outputs": [], "source": [ + "import sys\n", + "\n", + "sys.path.append('../shared_utils')\n", + "from utilities import (\n", + " print_success,\n", + " submit_and_download,\n", + ")\n", + "\n", + "from compare import compare_results_to_reference_file\n", + "\n", + "\n", "from datetime import datetime\n", "from os.path import exists\n", "\n", "from harmony import BBox, Client, Collection, Environment, Request\n", "\n", - "from utilities import (\n", - " compare_results_to_reference_file,\n", - " print_success,\n", + "from local_utilities import (\n", " remove_results_files,\n", - " submit_and_download,\n", ")" ] }, @@ -388,7 +393,7 @@ "metadata": { "celltoolbar": "Tags", "kernelspec": { - "display_name": "papermill-trajectory-subsetter", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -402,7 +407,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.5" }, "name": "SDS_Regression.ipynb" }, diff --git a/test/trajectory-subsetter/environment.yaml b/test/trajectory-subsetter/environment.yaml index 4e1ffaa..3123422 100644 --- a/test/trajectory-subsetter/environment.yaml +++ b/test/trajectory-subsetter/environment.yaml @@ -3,12 +3,11 @@ channels: - conda-forge - defaults dependencies: - - python=3.10 - - notebook=6.5.4 - - papermill=2.3.4 - - python-dateutil=2.7.5 - - numpy=1.24.3 - - netCDF4=1.6.3 - - xarray-datatree=0.0.12 + - python=3.11.5 + - notebook=7.2.1 + - papermill=2.6 + - numpy=2.1.2 + - netCDF4=1.7.1 - pip: - - harmony-py + - harmony-py==0.4.15 + - xarray==2024.9.0 diff --git a/test/trajectory-subsetter/local_utilities.py b/test/trajectory-subsetter/local_utilities.py new file mode 100644 index 0000000..1be293c --- /dev/null +++ b/test/trajectory-subsetter/local_utilities.py @@ -0,0 +1,19 @@ +""" A module containing utility functionality used by the Trajectory Subsetter + regression tests. These functions are kept out of the Jupyter notebook to + increase the readability of the regression test suite. + +""" + +from os import listdir, remove + + +def remove_results_files() -> None: + """Remove all hdf5 files downloaded during the Trajectory Subsetter + regression tests. + + """ + directory_files = listdir() + + for directory_file in directory_files: + if directory_file.endswith('.h5'): + remove(directory_file) diff --git a/test/trajectory-subsetter/utilities.py b/test/trajectory-subsetter/utilities.py deleted file mode 100644 index d682d97..0000000 --- a/test/trajectory-subsetter/utilities.py +++ /dev/null @@ -1,82 +0,0 @@ -""" A module containing utility functionality used by the Trajectory Subsetter - regression tests. These functions are kept out of the Jupyter notebook to - increase the readability of the regression test suite. - -""" - -from os import listdir, remove, replace - -from harmony import Client, Request -from harmony.harmony import ProcessingFailedException -from datatree import open_datatree - - -def compare_results_to_reference_file( - results_file_name: str, reference_file_name: str -) -> None: - """Use `DataTree` functionality to compare data values, variables, - coordinates, metadata, and all their corresponding attributes of - downloaded results to a reference file. - - """ - reference_data = open_datatree(reference_file_name) - results_data = open_datatree(results_file_name) - - assert results_data.identical(reference_data), ( - 'Output and reference files ' 'do not match.' - ) - - reference_data = None - results_data = None - - -def submit_and_download( - harmony_client: Client, request: Request, output_file_name: str -): - """Submit a Harmony request via a `harmony-py` client. Wait for the - Harmony job to finish, then download the results to the specified file - path. - - """ - downloaded_filename = None - - try: - job_id = harmony_client.submit(request) - - for filename in [ - file_future.result() - for file_future in harmony_client.download_all(job_id, overwrite=True) - ]: - - print(f'Downloaded: {filename}') - downloaded_filename = filename - - if downloaded_filename is not None: - replace(downloaded_filename, output_file_name) - print(f'Saved output to: {output_file_name}') - - except ProcessingFailedException as exception: - print_error('Harmony request failed to complete successfully.') - raise exception - - -def remove_results_files() -> None: - """Remove all NetCDF-4 files downloaded during the Swath Projector - regression tests. - - """ - directory_files = listdir() - - for directory_file in directory_files: - if directory_file.endswith('.h5'): - remove(directory_file) - - -def print_error(error_string: str) -> str: - """Print an error, with formatting for red text.""" - print(f'\033[91m{error_string}\033[0m') - - -def print_success(success_string: str) -> str: - """Print a success message, with formatting for green text.""" - print(f'\033[92mSuccess: {success_string}\033[0m') diff --git a/test/trajectory-subsetter/version.txt b/test/trajectory-subsetter/version.txt index c946ee6..1180819 100644 --- a/test/trajectory-subsetter/version.txt +++ b/test/trajectory-subsetter/version.txt @@ -1 +1 @@ -0.1.6 +0.1.7