diff --git a/changelog/74.improvement.md b/changelog/74.improvement.md new file mode 100644 index 0000000..d5ca8b4 --- /dev/null +++ b/changelog/74.improvement.md @@ -0,0 +1 @@ +Improved the error message if regexp parsing of paths fails for a given DRS. diff --git a/src/input4mips_validation/cvs/drs.py b/src/input4mips_validation/cvs/drs.py index a1c51f1..8e6e930 100644 --- a/src/input4mips_validation/cvs/drs.py +++ b/src/input4mips_validation/cvs/drs.py @@ -373,7 +373,7 @@ def extract_metadata_from_path( ) match = re.match(directory_regexp, str(directory)) if match is None: - msg = "regexp failed" + msg = f"regexp failed. {directory_regexp=}. {directory=}" raise AssertionError(msg) match_groups = match.groupdict() @@ -451,7 +451,7 @@ def extract_metadata_from_filename(self, filename: str) -> dict[str, str | None] filename_regexp = self.get_regexp_for_capturing_filename_information() match = re.match(filename_regexp, filename) if match is None: - msg = "regexp failed" + msg = f"regexp failed. {filename_regexp=}. {filename=}" raise AssertionError(msg) match_groups = match.groupdict() diff --git a/tests/unit/cvs/test_drs_path_handling.py b/tests/unit/cvs/test_drs_path_handling.py new file mode 100644 index 0000000..b5ef503 --- /dev/null +++ b/tests/unit/cvs/test_drs_path_handling.py @@ -0,0 +1,102 @@ +""" +Tests of path parsing with the DRS +""" + +from contextlib import nullcontext as does_not_raise + +import pytest + +from input4mips_validation.cvs.drs import DataReferenceSyntax + + +@pytest.mark.parametrize( + "directory_path_template, directory, exp_raise, exp_res", + ( + ( + "/////////v", + "/root/input4MIPs/CMIP6Plus/CMIP/PCMDI/PCMDI-AMIP-1-1-9/ocean/mon/tos/gn/v20230512/", + does_not_raise(), + { + "activity_id": "input4MIPs", + "frequency": "mon", + "grid_label": "gn", + "institution_id": "PCMDI", + "mip_era": "CMIP6Plus", + "realm": "ocean", + "source_id": "PCMDI-AMIP-1-1-9", + "target_mip": "CMIP", + "variable_id": "tos", + "version": "20230512", + }, + ), + pytest.param( + "/////////v", + "input4MIPs/CMIP/PCMDI/PCMDI-AMIP-1-1-9/ocean/mon/tos/gn/v20230512/", + pytest.raises( + AssertionError, + match="regexp failed. directory_regexp='.*'. directory='.*'", + ), + None, + id="missing_mip_era", + ), + ), +) +def test_extract_metadata_from_path( + directory_path_template, directory, exp_raise, exp_res +): + drs = DataReferenceSyntax( + directory_path_template=directory_path_template, + directory_path_example="not_used", + filename_template="not_used", + filename_example="not_used", + ) + with exp_raise: + res = drs.extract_metadata_from_path(directory) + + if exp_res is not None: + assert res == exp_res + + +@pytest.mark.parametrize( + "filename_template, filename, exp_raise, exp_res", + ( + ( + "_____[_].nc", + "tos_input4MIPs_SSTsAndSeaIce_CMIP_PCMDI-AMIP-1-1-9_gn_187001-202212.nc", + does_not_raise(), + { + "activity_id": "input4MIPs", + "dataset_category": "SSTsAndSeaIce", + "grid_label": "gn", + "source_id": "PCMDI-AMIP-1-1-9", + "target_mip": "CMIP", + "time_range": "187001-202212", + "variable_id": "tos", + }, + ), + pytest.param( + "_____[_].nc", + "tos_percentage_input4MIPs_SSTsAndSeaIce_CMIP_PCMDI-AMIP-1-1-9_gn_187001-202212.nc", + pytest.raises( + AssertionError, + match="regexp failed. filename_regexp='.*'. filename='.*'", + ), + None, + id="underscore_in_variable_id", + ), + ), +) +def test_extract_metadata_from_filename( + filename_template, filename, exp_raise, exp_res +): + drs = DataReferenceSyntax( + directory_path_template="not_used", + directory_path_example="not_used", + filename_template=filename_template, + filename_example="not_used", + ) + with exp_raise: + res = drs.extract_metadata_from_filename(filename) + + if exp_res is not None: + assert res == exp_res