From b63505f8bff42ee3667cc14ec83e7a465ba9789a Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 18 Oct 2024 13:02:25 +0200 Subject: [PATCH] fix the use of regrid.most_common in land_cover.py due to changes in version xarray_regrid v0.4.0 --- src/zampy/datasets/land_cover.py | 40 ++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index 25f12bb..9e54461 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -134,16 +134,30 @@ def load( ) raise ValueError(msg) files = list((ingest_dir / self.name).glob(f"{self.name}_*.nc")) - ds = xr.open_mfdataset(files, chunks={"latitude": 200, "longitude": 200}) ds = ds.sel(time=slice(time_bounds.start, time_bounds.end)) grid = xarray_regrid.create_regridding_dataset( utils.make_grid(spatial_bounds, resolution) ) - ds = ds.regrid.most_common(grid) - return ds + ds_regrid = {} + for variable in variable_names: + # select the variable to be regridded + da = ds[variable] + + # get values for most common method + if "flag_values" in da.attrs: + regrid_values = da.attrs["flag_values"] + else: + regrid_values = np.unique(da.values) + + da_regrid = da.regrid.most_common(grid, values=regrid_values) + + # make sure dtype is the same + ds_regrid[variable] = da_regrid.astype(da.dtype) + + return xr.Dataset(ds_regrid) def convert( self, @@ -207,7 +221,7 @@ def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: # only keep land cover class variable with xr.open_dataset(unzip_folder / zipped_file_name) as ds: - var_list = [var for var in ds.data_vars] + var_list = list(ds.data_vars) raw_variable = "lccs_class" var_list.remove(raw_variable) ds = ds.drop_vars(var_list) # noqa: PLW2901 @@ -225,7 +239,23 @@ def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: target_dataset = xarray_regrid.create_regridding_dataset(new_grid) - ds_regrid = ds.regrid.most_common(target_dataset) + # select the variable to be regridded + da = ds[raw_variable] + + # get values for most common method + if "flag_values" in da.attrs: + regrid_values = da.attrs["flag_values"] + else: + regrid_values = np.unique(da.values) + + + da_regrid = da.regrid.most_common(target_dataset, values=regrid_values) + + # make sure dtype is the same + da_regrid = da_regrid.astype(da.dtype) + + # convert dataarray to dataset + ds_regrid = da_regrid.to_dataset() # rename variable to follow the zampy convention variable_name = "land_cover"