diff --git a/.gitignore b/.gitignore index 0a37785d11..0f4b7edaf8 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ GPATH GRTAGS .vscode pyrightconfig.json +.ropeproject # ... ############################################################# LaTeX diff --git a/src/awkward/contents/bitmaskedarray.py b/src/awkward/contents/bitmaskedarray.py index 9c70bfc4b5..24f552fde3 100644 --- a/src/awkward/contents/bitmaskedarray.py +++ b/src/awkward/contents/bitmaskedarray.py @@ -38,7 +38,7 @@ ToArrowOptions, ) from awkward.forms.bitmaskedform import BitMaskedForm -from awkward.forms.form import Form +from awkward.forms.form import Form, FormKeyPathT from awkward.index import Index if TYPE_CHECKING: @@ -288,6 +288,16 @@ def _form_with_key(self, getkey: Callable[[Content], str | None]) -> BitMaskedFo form_key=form_key, ) + def _form_with_key_path(self, path: FormKeyPathT) -> BitMaskedForm: + return self.form_cls( + self._mask.form, + self._content._form_with_key_path((*path, None)), + self._valid_when, + self._lsb_order, + parameters=self._parameters, + form_key=repr(path), + ) + def _to_buffers( self, form: Form, diff --git a/src/awkward/contents/bytemaskedarray.py b/src/awkward/contents/bytemaskedarray.py index 87beb5f59f..07165be525 100644 --- a/src/awkward/contents/bytemaskedarray.py +++ b/src/awkward/contents/bytemaskedarray.py @@ -42,7 +42,7 @@ ) from awkward.errors import AxisError from awkward.forms.bytemaskedform import ByteMaskedForm -from awkward.forms.form import Form +from awkward.forms.form import Form, FormKeyPathT from awkward.index import Index if TYPE_CHECKING: @@ -218,6 +218,15 @@ def _form_with_key(self, getkey: Callable[[Content], str | None]) -> ByteMaskedF form_key=form_key, ) + def _form_with_key_path(self, path: FormKeyPathT) -> ByteMaskedForm: + return self.form_cls( + self._mask.form, + self._content._form_with_key_path((*path, None)), + self._valid_when, + parameters=self._parameters, + form_key=repr(path), + ) + def _to_buffers( self, form: Form, diff --git a/src/awkward/contents/content.py b/src/awkward/contents/content.py index f324d9dac5..15332d9052 100644 --- a/src/awkward/contents/content.py +++ b/src/awkward/contents/content.py @@ -54,7 +54,7 @@ TypedDict, ) from awkward._util import UNSET -from awkward.forms.form import Form +from awkward.forms.form import Form, FormKeyPathT from awkward.index import Index, Index64 if TYPE_CHECKING: @@ -238,6 +238,12 @@ def _form_with_key( ) -> Form: raise NotImplementedError + def form_with_key_path(self, root: FormKeyPathT = ()) -> Form: + return self._form_with_key_path(root) + + def _form_with_key_path(self, path: FormKeyPathT) -> Form: + raise NotImplementedError + @property def form_cls(self) -> type[Form]: raise NotImplementedError diff --git a/src/awkward/contents/emptyarray.py b/src/awkward/contents/emptyarray.py index 06447f2d8b..78318d713b 100644 --- a/src/awkward/contents/emptyarray.py +++ b/src/awkward/contents/emptyarray.py @@ -35,7 +35,7 @@ ) from awkward.errors import AxisError from awkward.forms.emptyform import EmptyForm -from awkward.forms.form import Form +from awkward.forms.form import Form, FormKeyPathT from awkward.index import Index if TYPE_CHECKING: @@ -118,6 +118,9 @@ def simplified(cls, *, parameters=None, backend=None): def _form_with_key(self, getkey: Callable[[Content], str | None]) -> EmptyForm: return self.form_cls(form_key=getkey(self)) + def _form_with_key_path(self, path: FormKeyPathT) -> EmptyForm: + return self.form_cls(form_key=repr(path)) + def _to_buffers( self, form: Form, diff --git a/src/awkward/contents/indexedarray.py b/src/awkward/contents/indexedarray.py index 6421f51742..4edc24ac07 100644 --- a/src/awkward/contents/indexedarray.py +++ b/src/awkward/contents/indexedarray.py @@ -39,7 +39,7 @@ ToArrowOptions, ) from awkward.errors import AxisError -from awkward.forms.form import Form +from awkward.forms.form import Form, FormKeyPathT from awkward.forms.indexedform import IndexedForm from awkward.index import Index @@ -214,6 +214,14 @@ def _form_with_key(self, getkey: Callable[[Content], str | None]) -> IndexedForm form_key=form_key, ) + def _form_with_key_path(self, path: FormKeyPathT) -> IndexedForm: + return self.form_cls( + self._index.form, + self._content._form_with_key_path((*path, None)), + parameters=self._parameters, + form_key=repr(path), + ) + def _to_buffers( self, form: Form, diff --git a/src/awkward/contents/indexedoptionarray.py b/src/awkward/contents/indexedoptionarray.py index 2162fb72c4..57e67c6aa9 100644 --- a/src/awkward/contents/indexedoptionarray.py +++ b/src/awkward/contents/indexedoptionarray.py @@ -39,7 +39,7 @@ ToArrowOptions, ) from awkward.errors import AxisError -from awkward.forms.form import Form +from awkward.forms.form import Form, FormKeyPathT from awkward.forms.indexedoptionform import IndexedOptionForm from awkward.index import Index @@ -202,6 +202,14 @@ def _form_with_key( form_key=form_key, ) + def _form_with_key_path(self, path: FormKeyPathT) -> IndexedOptionForm: + return self.form_cls( + self._index.form, + self._content._form_with_key_path((*path, None)), + parameters=self._parameters, + form_key=repr(path), + ) + def _to_buffers( self, form: Form, diff --git a/src/awkward/contents/listarray.py b/src/awkward/contents/listarray.py index 8f0a6f5e4b..9de9c65814 100644 --- a/src/awkward/contents/listarray.py +++ b/src/awkward/contents/listarray.py @@ -38,7 +38,7 @@ ToArrowOptions, ) from awkward.contents.listoffsetarray import ListOffsetArray -from awkward.forms.form import Form +from awkward.forms.form import Form, FormKeyPathT from awkward.forms.listform import ListForm from awkward.index import Index @@ -207,6 +207,15 @@ def _form_with_key(self, getkey: Callable[[Content], str | None]) -> ListForm: form_key=form_key, ) + def _form_with_key_path(self, path: FormKeyPathT) -> ListForm: + return self.form_cls( + self._starts.form, + self._stops.form, + self._content._form_with_key_path((*path, None)), + parameters=self._parameters, + form_key=repr(path), + ) + def _to_buffers( self, form: Form, diff --git a/src/awkward/contents/listoffsetarray.py b/src/awkward/contents/listoffsetarray.py index 003467c24b..9367810539 100644 --- a/src/awkward/contents/listoffsetarray.py +++ b/src/awkward/contents/listoffsetarray.py @@ -39,7 +39,7 @@ ToArrowOptions, ) from awkward.errors import AxisError -from awkward.forms.form import Form +from awkward.forms.form import Form, FormKeyPathT from awkward.forms.listoffsetform import ListOffsetForm from awkward.index import Index, Index64 @@ -199,6 +199,14 @@ def _form_with_key(self, getkey: Callable[[Content], str | None]) -> ListOffsetF form_key=form_key, ) + def _form_with_key_path(self, path: FormKeyPathT) -> ListOffsetForm: + return self.form_cls( + self._offsets.form, + self._content._form_with_key_path((*path, None)), + parameters=self._parameters, + form_key=repr(path), + ) + def _to_buffers( self, form: Form, diff --git a/src/awkward/contents/numpyarray.py b/src/awkward/contents/numpyarray.py index 5c90ca0141..3b07810b40 100644 --- a/src/awkward/contents/numpyarray.py +++ b/src/awkward/contents/numpyarray.py @@ -45,7 +45,7 @@ ToArrowOptions, ) from awkward.errors import AxisError -from awkward.forms.form import Form +from awkward.forms.form import Form, FormKeyPathT from awkward.forms.numpyform import NumpyForm from awkward.index import Index from awkward.types.numpytype import primitive_to_dtype @@ -200,6 +200,14 @@ def _form_with_key(self, getkey: Callable[[Content], str | None]) -> NumpyForm: form_key=getkey(self), ) + def _form_with_key_path(self, path: FormKeyPathT) -> NumpyForm: + return self.form_cls( + ak.types.numpytype.dtype_to_primitive(self._data.dtype), + self.inner_shape, + parameters=self._parameters, + form_key=repr(path), + ) + def _to_buffers( self, form: Form, diff --git a/src/awkward/contents/recordarray.py b/src/awkward/contents/recordarray.py index 4aafcfd6b2..ce51935cb0 100644 --- a/src/awkward/contents/recordarray.py +++ b/src/awkward/contents/recordarray.py @@ -40,7 +40,7 @@ ToArrowOptions, ) from awkward.errors import AxisError -from awkward.forms.form import Form +from awkward.forms.form import Form, FormKeyPathT from awkward.forms.recordform import RecordForm from awkward.index import Index from awkward.record import Record @@ -316,6 +316,22 @@ def _form_with_key(self, getkey: Callable[[Content], str | None]) -> RecordForm: form_key=form_key, ) + def _form_with_key_path(self, path: FormKeyPathT) -> RecordForm: + # explicitly use `self.fields` instead of `self._fields`, + # because we want string-typed field names in the path - + # also for tuple records + contents = [ + x._form_with_key_path((*path, k)) + for k, x in zip(self.fields, self._contents) + ] + + return self.form_cls( + contents, + self._fields, + parameters=self._parameters, + form_key=repr(path), + ) + def _to_buffers( self, form: Form, diff --git a/src/awkward/contents/regulararray.py b/src/awkward/contents/regulararray.py index 318a21bca5..da144282b7 100644 --- a/src/awkward/contents/regulararray.py +++ b/src/awkward/contents/regulararray.py @@ -37,7 +37,7 @@ RemoveStructureOptions, ToArrowOptions, ) -from awkward.forms.form import Form +from awkward.forms.form import Form, FormKeyPathT from awkward.forms.regularform import RegularForm from awkward.index import Index @@ -211,6 +211,14 @@ def _form_with_key(self, getkey: Callable[[Content], str | None]) -> RegularForm form_key=form_key, ) + def _form_with_key_path(self, path: FormKeyPathT) -> RegularForm: + return self.form_cls( + self._content._form_with_key_path((*path, None)), + self._size, + parameters=self._parameters, + form_key=repr(path), + ) + def _to_buffers( self, form: Form, diff --git a/src/awkward/contents/unionarray.py b/src/awkward/contents/unionarray.py index 5bac5fda40..d92b107e47 100644 --- a/src/awkward/contents/unionarray.py +++ b/src/awkward/contents/unionarray.py @@ -39,7 +39,7 @@ ToArrowOptions, ) from awkward.errors import AxisError -from awkward.forms.form import Form +from awkward.forms.form import Form, FormKeyPathT from awkward.forms.unionform import UnionForm from awkward.index import Index, Index8, Index64 @@ -460,6 +460,15 @@ def _form_with_key(self, getkey: Callable[[Content], str | None]) -> UnionForm: form_key=form_key, ) + def _form_with_key_path(self, path: FormKeyPathT) -> UnionForm: + return self.form_cls( + self._tags.form, + self._index.form, + [x._form_with_key_path((*path, i)) for i, x in enumerate(self._contents)], + parameters=self._parameters, + form_key=repr(path), + ) + def _to_buffers( self, form: Form, diff --git a/src/awkward/contents/unmaskedarray.py b/src/awkward/contents/unmaskedarray.py index 0dd500ebc1..2eb82d64a0 100644 --- a/src/awkward/contents/unmaskedarray.py +++ b/src/awkward/contents/unmaskedarray.py @@ -39,7 +39,7 @@ ToArrowOptions, ) from awkward.errors import AxisError -from awkward.forms.form import Form +from awkward.forms.form import Form, FormKeyPathT from awkward.forms.unmaskedform import UnmaskedForm from awkward.index import Index @@ -138,6 +138,13 @@ def _form_with_key(self, getkey: Callable[[Content], str | None]) -> UnmaskedFor form_key=form_key, ) + def _form_with_key_path(self, path: FormKeyPathT) -> UnmaskedForm: + return self.form_cls( + self._content._form_with_key_path((*path, None)), + parameters=self._parameters, + form_key=repr(path), + ) + def _to_buffers( self, form: Form, diff --git a/src/awkward/forms/form.py b/src/awkward/forms/form.py index 49082970eb..5ebee62b83 100644 --- a/src/awkward/forms/form.py +++ b/src/awkward/forms/form.py @@ -27,6 +27,9 @@ Iterator, JSONMapping, Self, + Tuple, + TypeAlias, + Union, ) __all__ = ("from_dict", "from_type", "from_json", "reserved_nominal_parameters", "Form") @@ -34,6 +37,7 @@ np = NumpyMetadata.instance() numpy_backend = NumpyBackend.instance() +FormKeyPathT: TypeAlias = Tuple[Union[str, int, None], ...] reserved_nominal_parameters: Final = frozenset( { diff --git a/tests/test_3311_form_with_key_path.py b/tests/test_3311_form_with_key_path.py new file mode 100644 index 0000000000..df7308ef0f --- /dev/null +++ b/tests/test_3311_form_with_key_path.py @@ -0,0 +1,257 @@ +from __future__ import annotations + +import numpy as np + +import awkward as ak + + +def test_record_tuple(): + form = ak.forms.from_dict( + { + "class": "RecordArray", + "fields": None, + "contents": [ + {"class": "NumpyArray", "primitive": "int64", "form_key": "('0',)"}, + {"class": "NumpyArray", "primitive": "int64", "form_key": "('1',)"}, + ], + "form_key": "()", + } + ) + array = ak.Array([(1, 2)]) + assert array.layout.form_with_key_path() == form + + +def test_record_dict(): + form = ak.forms.from_dict( + { + "class": "RecordArray", + "fields": ["foo", "bar"], + "contents": [ + {"class": "NumpyArray", "primitive": "int64", "form_key": "('foo',)"}, + {"class": "NumpyArray", "primitive": "int64", "form_key": "('bar',)"}, + ], + "form_key": "()", + } + ) + array = ak.Array({"foo": [1], "bar": [2]}) + assert array.layout.form_with_key_path() == form + + +def test_numpy(): + form = ak.forms.from_dict( + {"class": "NumpyArray", "primitive": "int64", "form_key": "()"} + ) + array = ak.Array([1, 2, 3]) + assert array.layout.form_with_key_path() == form + + +def test_listoffset(): + form = ak.forms.from_dict( + { + "class": "ListOffsetArray", + "offsets": "i64", + "content": { + "class": "NumpyArray", + "primitive": "int64", + "form_key": "(None,)", + }, + "form_key": "()", + } + ) + array = ak.Array([[1, 2], [3]]) + assert array.layout.form_with_key_path() == form + + +def test_empty(): + form = ak.forms.from_dict({"class": "EmptyArray", "form_key": "()"}) + array = ak.Array([]) + assert array.layout.form_with_key_path() == form + + +def test_bitmasked(): + form = ak.forms.from_dict( + { + "class": "BitMaskedArray", + "mask": "u8", + "valid_when": True, + "lsb_order": False, + "content": { + "class": "NumpyArray", + "primitive": "float64", + "form_key": "(None,)", + }, + "form_key": "()", + } + ) + content = ak.Array([0.0, 1.1, 2.2, 3.3, 4.4]).layout + mask = ak.index.IndexU8( + np.packbits(np.array([False, True, True, False, False], dtype=np.int8)) + ) + bitmaskedarray = ak.contents.BitMaskedArray(mask, content, True, 5, False) + assert bitmaskedarray.form_with_key_path() == form + + +def test_bytemasked(): + form = ak.forms.from_dict( + { + "class": "ByteMaskedArray", + "mask": "i8", + "valid_when": True, + "content": { + "class": "NumpyArray", + "primitive": "float64", + "form_key": "(None,)", + }, + "form_key": "()", + } + ) + content = ak.Array([0.0, 1.1, 2.2, 3.3, 4.4]).layout + mask = ak.index.Index8(np.array([False, True, True, False, False], dtype=np.int8)) + bytemaskedarray = ak.contents.ByteMaskedArray(mask, content, True) + assert bytemaskedarray.form_with_key_path() == form + + +def test_indexedarray(): + form = ak.forms.from_dict( + { + "class": "IndexedArray", + "index": "i64", + "content": { + "class": "NumpyArray", + "primitive": "float64", + "form_key": "(None,)", + }, + "form_key": "()", + } + ) + + content = ak.Array([0.0, 1.1, 2.2, 3.3, 4.4]).layout + index = ak.index.Index64(np.array([3, 1, 1, 4, 2], dtype=np.int64)) + indexedarray = ak.contents.IndexedArray(index, content) + assert indexedarray.form_with_key_path() == form + + +def test_indexedoptionarray(): + form = ak.forms.from_dict( + { + "class": "IndexedOptionArray", + "index": "i64", + "content": { + "class": "NumpyArray", + "primitive": "float64", + "form_key": "(None,)", + }, + "form_key": "()", + } + ) + content = ak.Array( + np.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) + ).layout + index = ak.index.Index64(np.array([2, -1, 4, 0, 8], dtype=np.int64)) + layout = ak.Array(ak.contents.IndexedOptionArray(index, content)).layout + assert layout.form_with_key_path() == form + + +def test_listarray(): + form = ak.forms.from_dict( + { + "class": "ListArray", + "starts": "i64", + "stops": "i64", + "content": { + "class": "NumpyArray", + "primitive": "float64", + "form_key": "(None,)", + }, + "form_key": "()", + } + ) + content = ak.contents.NumpyArray( + np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) + ) + starts = ak.index.Index64(np.array([0, 3, 3, 5, 6])) + stops = ak.index.Index64(np.array([3, 3, 5, 6, 9])) + layout = ak.contents.ListArray(starts, stops, content) + assert layout.form_with_key_path() == form + + +def test_regulararray(): + form = ak.forms.from_dict( + { + "class": "RegularArray", + "size": 3, + "content": { + "class": "NumpyArray", + "primitive": "int64", + "form_key": "(None,)", + }, + "form_key": "()", + } + ) + content = ak.Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]).layout + layout = ak.contents.RegularArray(content, 3, zeros_length=0) + assert layout.form_with_key_path() == form + + +def test_unionarray(): + form = ak.forms.from_dict( + { + "class": "UnionArray", + "tags": "i8", + "index": "i64", + "contents": [ + { + "class": "ListOffsetArray", + "offsets": "i64", + "content": { + "class": "NumpyArray", + "primitive": "float64", + "form_key": "(0, None)", + }, + "form_key": "(0,)", + }, + { + "class": "ListOffsetArray", + "offsets": "i64", + "content": { + "class": "ListOffsetArray", + "offsets": "i64", + "content": { + "class": "NumpyArray", + "primitive": "uint8", + "parameters": {"__array__": "char"}, + "form_key": "(1, None, None)", + }, + "parameters": {"__array__": "string"}, + "form_key": "(1, None)", + }, + "form_key": "(1,)", + }, + ], + "form_key": "()", + } + ) + content1 = ak.operations.from_iter([[], [1.1], [2.2, 2.2]], highlevel=False) + content2 = ak.operations.from_iter([["two", "two"], ["one"], []], highlevel=False) + tags = ak.index.Index8(np.array([0, 1, 0, 1, 0, 1], dtype=np.int8)) + index = ak.index.Index64(np.array([0, 0, 1, 1, 2, 2], dtype=np.int64)) + layout = ak.contents.UnionArray(tags, index, [content1, content2]) + assert layout.form_with_key_path() == form + + +def test_unmaskedarray(): + form = ak.forms.from_dict( + { + "class": "UnmaskedArray", + "content": { + "class": "NumpyArray", + "primitive": "int64", + "form_key": "(None,)", + }, + "form_key": "()", + } + ) + + content = ak.Array([1, 2, 3, 4, 5]).layout + layout = ak.contents.UnmaskedArray(content) + assert layout.form_with_key_path() == form