Skip to content

Commit

Permalink
Ab/filters dtype (#66)
Browse files Browse the repository at this point in the history
* Add test

* Add dtype List[Dict] for filters

* Revert to using xarray main
  • Loading branch information
abarciauskas-bgse authored Mar 29, 2024
1 parent 2c5be3f commit aa18eaa
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 12 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,4 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
virtualizarr/_version.py
31 changes: 22 additions & 9 deletions virtualizarr/tests/test_kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,25 @@
from virtualizarr.manifests import ChunkEntry, ChunkManifest, ManifestArray
from virtualizarr.xarray import dataset_from_kerchunk_refs


def test_dataset_from_kerchunk_refs():
ds_refs = {
def gen_ds_refs(
zgroup: str = '{"zarr_format":2}',
zarray: str = '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
zattrs: str = '{"_ARRAY_DIMENSIONS":["x","y"]}',
chunk: list = ["test1.nc", 6144, 48],
):
return {
"version": 1,
"refs": {
".zgroup": '{"zarr_format":2}',
"a/.zarray": '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
"a/.zattrs": '{"_ARRAY_DIMENSIONS":["x","y"]}',
"a/0.0": ["test1.nc", 6144, 48],
".zgroup": zgroup,
"a/.zarray": zarray,
"a/.zattrs": zattrs,
"a/0.0": chunk,
},
}

ds = dataset_from_kerchunk_refs(ds_refs)

def test_dataset_from_df_refs():
ds_refs = gen_ds_refs()
ds = dataset_from_kerchunk_refs(ds_refs)
assert "a" in ds
da = ds["a"]
assert isinstance(da.data, ManifestArray)
Expand All @@ -38,6 +43,14 @@ def test_dataset_from_kerchunk_refs():
"0.0": {"path": "test1.nc", "offset": 6144, "length": 48}
}

def test_dataset_from_df_refs_with_filters():
filters = [{"elementsize":4,"id":"shuffle"},{"id":"zlib","level":4}]
zarray = {"chunks":[2,3],"compressor":None,"dtype":"<i8","fill_value":None,"filters":filters,"order":"C","shape":[2,3],"zarr_format":2}
ds_refs = gen_ds_refs(zarray=ujson.dumps(zarray))
ds = dataset_from_kerchunk_refs(ds_refs)
da = ds["a"]
assert da.data.zarray.filters == filters


class TestAccessor:
def test_accessor_to_kerchunk_dict(self):
Expand Down
6 changes: 3 additions & 3 deletions virtualizarr/zarr.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Literal, NewType, Optional, Tuple, Union
from typing import Any, Literal, NewType, Optional, Tuple, Union, List, Dict

import numpy as np
import ujson # type: ignore
Expand All @@ -12,7 +12,7 @@

class Codec(BaseModel):
compressor: Optional[str] = None
filters: Optional[str] = None
filters: Optional[List[Dict]] = None

def __repr__(self) -> str:
return f"Codec(compressor={self.compressor}, filters={self.filters})"
Expand All @@ -31,7 +31,7 @@ class ZArray(BaseModel):
compressor: Optional[str] = None
dtype: np.dtype
fill_value: Optional[float] = None # float or int?
filters: Optional[str] = None
filters: Optional[List[Dict]] = None
order: Union[Literal["C"], Literal["F"]]
shape: Tuple[int, ...]
zarr_format: Union[Literal[2], Literal[3]] = 2
Expand Down

0 comments on commit aa18eaa

Please sign in to comment.