Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ab/filters dtype #66

Merged
merged 3 commits into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,4 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
virtualizarr/_version.py
31 changes: 22 additions & 9 deletions virtualizarr/tests/test_kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,25 @@
from virtualizarr.manifests import ChunkEntry, ChunkManifest, ManifestArray
from virtualizarr.xarray import dataset_from_kerchunk_refs


def test_dataset_from_kerchunk_refs():
ds_refs = {
def gen_ds_refs(
zgroup: str = '{"zarr_format":2}',
zarray: str = '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
zattrs: str = '{"_ARRAY_DIMENSIONS":["x","y"]}',
chunk: list = ["test1.nc", 6144, 48],
):
return {
"version": 1,
"refs": {
".zgroup": '{"zarr_format":2}',
"a/.zarray": '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
"a/.zattrs": '{"_ARRAY_DIMENSIONS":["x","y"]}',
"a/0.0": ["test1.nc", 6144, 48],
".zgroup": zgroup,
"a/.zarray": zarray,
"a/.zattrs": zattrs,
"a/0.0": chunk,
},
}

ds = dataset_from_kerchunk_refs(ds_refs)

def test_dataset_from_df_refs():
ds_refs = gen_ds_refs()
ds = dataset_from_kerchunk_refs(ds_refs)
assert "a" in ds
da = ds["a"]
assert isinstance(da.data, ManifestArray)
Expand All @@ -38,6 +43,14 @@ def test_dataset_from_kerchunk_refs():
"0.0": {"path": "test1.nc", "offset": 6144, "length": 48}
}

def test_dataset_from_df_refs_with_filters():
filters = [{"elementsize":4,"id":"shuffle"},{"id":"zlib","level":4}]
zarray = {"chunks":[2,3],"compressor":None,"dtype":"<i8","fill_value":None,"filters":filters,"order":"C","shape":[2,3],"zarr_format":2}
ds_refs = gen_ds_refs(zarray=ujson.dumps(zarray))
ds = dataset_from_kerchunk_refs(ds_refs)
da = ds["a"]
assert da.data.zarray.filters == filters


class TestAccessor:
def test_accessor_to_kerchunk_dict(self):
Expand Down
6 changes: 3 additions & 3 deletions virtualizarr/zarr.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Literal, NewType, Optional, Tuple, Union
from typing import Any, Literal, NewType, Optional, Tuple, Union, List, Dict

import numpy as np
import ujson # type: ignore
Expand All @@ -12,7 +12,7 @@

class Codec(BaseModel):
compressor: Optional[str] = None
filters: Optional[str] = None
filters: Optional[List[Dict]] = None

def __repr__(self) -> str:
return f"Codec(compressor={self.compressor}, filters={self.filters})"
Expand All @@ -31,7 +31,7 @@ class ZArray(BaseModel):
compressor: Optional[str] = None
dtype: np.dtype
fill_value: Optional[float] = None # float or int?
filters: Optional[str] = None
filters: Optional[List[Dict]] = None
order: Union[Literal["C"], Literal["F"]]
shape: Tuple[int, ...]
zarr_format: Union[Literal[2], Literal[3]] = 2
Expand Down
Loading