Ab/filters dtype (#66)

* Add test * Add dtype List[Dict] for filters * Revert to using xarray main
zarr-developers · Mar 29, 2024 · aa18eaa · aa18eaa
1 parent 2c5be3f
commit aa18eaa
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 12 deletions.
diff --git a/.gitignore b/.gitignore
@@ -158,3 +158,4 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+virtualizarr/_version.py
diff --git a/virtualizarr/tests/test_kerchunk.py b/virtualizarr/tests/test_kerchunk.py
@@ -7,20 +7,25 @@
 from virtualizarr.manifests import ChunkEntry, ChunkManifest, ManifestArray
 from virtualizarr.xarray import dataset_from_kerchunk_refs
 
-
-def test_dataset_from_kerchunk_refs():
-    ds_refs = {
+def gen_ds_refs(
+        zgroup: str = '{"zarr_format":2}',
+        zarray: str = '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
+        zattrs: str = '{"_ARRAY_DIMENSIONS":["x","y"]}',
+        chunk: list = ["test1.nc", 6144, 48],
+):
+    return {
         "version": 1,
         "refs": {
-            ".zgroup": '{"zarr_format":2}',
-            "a/.zarray": '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
-            "a/.zattrs": '{"_ARRAY_DIMENSIONS":["x","y"]}',
-            "a/0.0": ["test1.nc", 6144, 48],
+            ".zgroup": zgroup,
+            "a/.zarray": zarray,
+            "a/.zattrs": zattrs,
+            "a/0.0": chunk,
         },
     }
 
-    ds = dataset_from_kerchunk_refs(ds_refs)
-
+def test_dataset_from_df_refs():
+    ds_refs = gen_ds_refs()
+    ds =  dataset_from_kerchunk_refs(ds_refs)
     assert "a" in ds
     da = ds["a"]
     assert isinstance(da.data, ManifestArray)
@@ -38,6 +43,14 @@ def test_dataset_from_kerchunk_refs():
         "0.0": {"path": "test1.nc", "offset": 6144, "length": 48}
     }
 
+def test_dataset_from_df_refs_with_filters():
+    filters = [{"elementsize":4,"id":"shuffle"},{"id":"zlib","level":4}]
+    zarray = {"chunks":[2,3],"compressor":None,"dtype":"<i8","fill_value":None,"filters":filters,"order":"C","shape":[2,3],"zarr_format":2}
+    ds_refs = gen_ds_refs(zarray=ujson.dumps(zarray))
+    ds =  dataset_from_kerchunk_refs(ds_refs)
+    da = ds["a"]
+    assert da.data.zarray.filters == filters
+
 
 class TestAccessor:
     def test_accessor_to_kerchunk_dict(self):

diff --git a/virtualizarr/zarr.py b/virtualizarr/zarr.py
@@ -1,4 +1,4 @@
-from typing import Any, Literal, NewType, Optional, Tuple, Union
+from typing import Any, Literal, NewType, Optional, Tuple, Union, List, Dict
 
 import numpy as np
 import ujson  # type: ignore
@@ -12,7 +12,7 @@
 
 class Codec(BaseModel):
     compressor: Optional[str] = None
-    filters: Optional[str] = None
+    filters: Optional[List[Dict]] = None
 
     def __repr__(self) -> str:
         return f"Codec(compressor={self.compressor}, filters={self.filters})"
@@ -31,7 +31,7 @@ class ZArray(BaseModel):
     compressor: Optional[str] = None
     dtype: np.dtype
     fill_value: Optional[float] = None  # float or int?
-    filters: Optional[str] = None
+    filters: Optional[List[Dict]] = None
     order: Union[Literal["C"], Literal["F"]]
     shape: Tuple[int, ...]
     zarr_format: Union[Literal[2], Literal[3]] = 2