From b28492a350540a2b60b517ff88ee5705b913cf5f Mon Sep 17 00:00:00 2001 From: Sebastien Binet Date: Mon, 13 Nov 2023 12:19:12 +0100 Subject: [PATCH] npy: first stab at a n-dim array with support for ragged-arrays Fixes #20. Signed-off-by: Sebastien Binet --- dump.go | 11 +- dump_test.go | 4 + go.mod | 2 + go.sum | 4 + npy/array.go | 231 +++++ npy/descr.go | 595 +++++++++++++ npy/gen-pickle.go | 657 ++++++++++++++ npy/npy.go | 28 +- npy/pickle.go | 103 +++ npy/pickle_test.go | 53 ++ npy/reader.go | 44 + npy/reader_test.go | 19 + npy/zall_test.go | 1542 +++++++++++++++++++++++++++++++++ testdata/ragged-array.npy | Bin 0 -> 315 bytes testdata/ragged-array.npy.txt | 4 + 15 files changed, 3286 insertions(+), 11 deletions(-) create mode 100644 npy/array.go create mode 100644 npy/descr.go create mode 100644 npy/gen-pickle.go create mode 100644 npy/pickle.go create mode 100644 npy/pickle_test.go create mode 100644 npy/zall_test.go create mode 100644 testdata/ragged-array.npy create mode 100644 testdata/ragged-array.npy.txt diff --git a/dump.go b/dump.go index e1277c3..cd5ce2a 100644 --- a/dump.go +++ b/dump.go @@ -9,7 +9,6 @@ import ( "fmt" "io" "os" - "reflect" "strings" "github.com/sbinet/npyio/npy" @@ -128,15 +127,11 @@ func display(o io.Writer, f io.Reader, fname string) error { fmt.Fprintf(o, "npy-header: %v\n", r.Header) - rt := npy.TypeFrom(r.Header.Descr.Type) - if rt == nil { - return fmt.Errorf("npyio: no reflect type for %q", r.Header.Descr.Type) - } - rv := reflect.New(reflect.SliceOf(rt)) - err = r.Read(rv.Interface()) + var arr npy.Array + err = r.Read(&arr) if err != nil && err != io.EOF { return fmt.Errorf("npyio: read error: %w", err) } - fmt.Fprintf(o, "data = %v\n", rv.Elem().Interface()) + fmt.Fprintf(o, "data = %v\n", arr.Data()) return nil } diff --git a/dump_test.go b/dump_test.go index bcfee1e..09d35c3 100644 --- a/dump_test.go +++ b/dump_test.go @@ -36,6 +36,10 @@ func TestDump(t *testing.T) { name: "testdata/data_float64_forder.npz", want: "testdata/data_float64_forder.npz.txt", }, + { + name: "testdata/ragged-array.npy", + want: "testdata/ragged-array.npy.txt", + }, } { t.Run(tc.name, func(t *testing.T) { f, err := os.Open(tc.name) diff --git a/go.mod b/go.mod index 6869d7a..2c26094 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,8 @@ go 1.20 require ( github.com/campoy/embedmd v1.0.0 + github.com/nlpodyssey/gopickle v0.2.1-0.20231124153821-2139434d2287 + golang.org/x/text v0.14.0 gonum.org/v1/gonum v0.14.0 ) diff --git a/go.sum b/go.sum index 88984a0..77a3f51 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,11 @@ github.com/campoy/embedmd v1.0.0 h1:V4kI2qTJJLf4J29RzI/MAt2c3Bl4dQSYPuflzwFH2hY= github.com/campoy/embedmd v1.0.0/go.mod h1:oxyr9RCiSXg0M3VJ3ks0UGfp98BpSSGr0kpiX3MzVl8= +github.com/nlpodyssey/gopickle v0.2.1-0.20231124153821-2139434d2287 h1:cqTk2IOiApRFn/e3YoAVQDgvOc6yU1zihhrH1w14WTg= +github.com/nlpodyssey/gopickle v0.2.1-0.20231124153821-2139434d2287/go.mod h1:f070HJ/yR+eLi5WmM1OXJEGaTpuJEUiib19olXgYha0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= golang.org/x/exp v0.0.0-20230321023759-10a507213a29 h1:ooxPy7fPvB4kwsA2h+iBNHkAbp/4JxTSwCmvdjEYmug= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0= gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU= diff --git a/npy/array.go b/npy/array.go new file mode 100644 index 0000000..c5fb333 --- /dev/null +++ b/npy/array.go @@ -0,0 +1,231 @@ +// Copyright 2023 The npyio Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package npy + +import ( + "fmt" + "strings" + + py "github.com/nlpodyssey/gopickle/types" +) + +// Array is a multidimensional, homogeneous array of fixed-size items. +type Array struct { + descr ArrayDescr + shape []int + strides []int + fortran bool + + data any +} + +var ( + _ py.Callable = (*Array)(nil) + _ py.PyNewable = (*Array)(nil) + _ py.PyStateSettable = (*Array)(nil) +) + +func (*Array) Call(args ...any) (any, error) { + switch sz := len(args); { + case sz < 1, sz > 3: + return nil, fmt.Errorf("invalid tuple length (got=%d)", sz) + } + + return &Array{}, nil +} + +func (*Array) PyNew(args ...any) (any, error) { + var ( + subtype = args[0] + descr = args[1].(*ArrayDescr) + shape = args[2].([]int) + strides = args[3].([]int) + data = args[4].([]byte) + flags = args[5].(int) + ) + + return newArray(subtype, *descr, shape, strides, data, flags) +} + +func newArray(subtype any, descr ArrayDescr, shape, strides []int, data []byte, flags int) (*Array, error) { + switch subtype := subtype.(type) { + case *Array: + // ok. + default: + return nil, fmt.Errorf("subtyping ndarray with %T is not (yet?) supported", subtype) + } + + arr := &Array{ + descr: descr, + shape: shape, + strides: strides, + data: data, + } + return arr, nil +} + +func (arr *Array) PySetState(arg any) error { + tuple, ok := arg.(*py.Tuple) + if !ok { + return fmt.Errorf("invalid argument type %T", arg) + } + + var ( + vers = 0 + shape py.Tuple + raw any + ) + switch tuple.Len() { + case 5: + err := parseTuple(tuple, &vers, &shape, &arr.descr, &arr.fortran, nil) + if err != nil { + return fmt.Errorf("could not parse ndarray.__setstate__ tuple: %w", err) + } + raw = tuple.Get(4) + case 4: + err := parseTuple(tuple, &shape, &arr.descr, &arr.fortran, nil) + if err != nil { + return fmt.Errorf("could not parse ndarray.__setstate__ tuple: %w", err) + } + raw = tuple.Get(3) + default: + return fmt.Errorf("invalid length (%d) for ndarray.__setstate__ tuple", tuple.Len()) + } + + arr.shape = nil + for i := range shape { + v, ok := shape.Get(i).(int) + if !ok { + return fmt.Errorf("invalid shape[%d]: got=%T, want=int", i, shape.Get(i)) + } + arr.shape = append(arr.shape, v) + } + + err := arr.setupStrides() + if err != nil { + return fmt.Errorf("ndarray.__setstate__ could not infer strides: %w", err) + } + + switch raw := raw.(type) { + case *py.List: + arr.data = raw + + case []byte: + data, err := arr.descr.unmarshal(raw, arr.shape) + if err != nil { + return fmt.Errorf("ndarray.__setstate__ could not unmarshal raw data: %w", err) + } + arr.data = data + } + + return nil +} + +func (arr *Array) setupStrides() error { + // TODO(sbinet): complete implementation. + // see: _array_fill_strides in numpy/_core/multiarray/ctors.c + + if arr.shape == nil { + arr.strides = nil + return nil + } + + strides := make([]int, len(arr.shape)) + // FIXME(sbinet): handle non-contiguous arrays + // FIXME(sbinet): handle FORTRAN arrays + + var ( + // notCFContig bool + noDim bool // a dimension != 1 was found + ) + + // check if array is both FORTRAN- and C-contiguous + for _, dim := range arr.shape { + if dim != 1 { + if noDim { + // notCFContig = true + break + } + noDim = true + } + } + + itemsize := arr.descr.itemsize() + switch { + case arr.fortran: + for i, dim := range arr.shape { + strides[i] = itemsize + switch { + case dim != 0: + itemsize *= dim + default: + // notCFContig = false + } + } + + default: + for i := len(arr.shape) - 1; i >= 0; i-- { + dim := arr.shape[i] + strides[i] = itemsize + switch { + case dim != 0: + itemsize *= dim + default: + // notCFContig = false + } + } + } + + arr.strides = strides + return nil +} + +// Descr returns the array's data type descriptor. +func (arr Array) Descr() ArrayDescr { + return arr.descr +} + +// Shape returns the array's shape. +func (arr Array) Shape() []int { + return arr.shape +} + +// Strides returns the array's strides in bytes. +func (arr Array) Strides() []int { + return arr.strides +} + +// Fortran returns whether the array's data is stored in FORTRAN-order +// (ie: column-major) instead of C-order (ie: row-major.) +func (arr Array) Fortran() bool { + return arr.fortran +} + +// Data returns the array's underlying data. +func (arr Array) Data() any { + return arr.data +} + +func (arr Array) String() string { + o := new(strings.Builder) + fmt.Fprintf(o, "Array{descr: %v, ", arr.descr) + switch arr.shape { + case nil: + fmt.Fprintf(o, "shape: nil, ") + default: + fmt.Fprintf(o, "shape: %v, ", arr.shape) + } + switch arr.strides { + case nil: + fmt.Fprintf(o, "strides: nil, ") + default: + fmt.Fprintf(o, "strides: %v, ", arr.strides) + } + fmt.Fprintf(o, "fortran: %v, data: %+v}", + arr.fortran, + arr.data, + ) + return o.String() +} diff --git a/npy/descr.go b/npy/descr.go new file mode 100644 index 0000000..91c070b --- /dev/null +++ b/npy/descr.go @@ -0,0 +1,595 @@ +// Copyright 2023 The npyio Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package npy + +import ( + "bytes" + "encoding/binary" + "fmt" + "math" + "sort" + "strconv" + "strings" + "unicode/utf8" + + py "github.com/nlpodyssey/gopickle/types" + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/unicode/utf32" +) + +// ArrayDescr describes a numpy data type. +type ArrayDescr struct { + kind byte + order binary.ByteOrder + flags int // flags describing data type + esize int // element size in bytes + align int // alignment needed for this type + + subarr *subarrayDescr // non-nil if this type is an array (C-continguous) of some other type. + names []string // fields' names (if any) + fields structFields // fields (if any) + meta map[string]any +} + +func newDescrFrom(v any, flags int) (*ArrayDescr, error) { + switch v := v.(type) { + case nil: + return &ArrayDescr{kind: 'f', order: binary.LittleEndian, esize: 8, align: 8, flags: flags}, nil + case *ArrayDescr: + return v, nil + case string: + return newDescrFromStr(v, flags) + default: + return nil, fmt.Errorf("invalid type %T for dtype ctor", v) + } +} + +func newDescrFromStr(typ string, flags int) (*ArrayDescr, error) { + dt := &ArrayDescr{order: nil, esize: -1, align: -1, flags: flags} + + if len(typ) == 0 { + return nil, fmt.Errorf("data type %q not understood", typ) + } + + descr := typ + switch { + case strings.HasPrefix(typ, "<"): + descr = descr[1:] + dt.order = binary.LittleEndian + case strings.HasPrefix(typ, "="): + descr = descr[1:] + dt.order = nativeEndian + case strings.HasPrefix(typ, "|"): + descr = descr[1:] + dt.order = nil + case strings.HasPrefix(typ, ">"): + descr = descr[1:] + dt.order = binary.BigEndian + } + + if len(descr) == 0 { + return nil, fmt.Errorf("data type %v not understood", typ) + } + + if isDatetimeStr(descr) { + // FIXME(sbinet) + return nil, fmt.Errorf("datetime string not implemented") + } + + err := dt.init(descr) + if err != nil { + return nil, err + } + + return dt, nil +} + +func isDatetimeStr(typ string) bool { + if len(typ) < 2 { + return false + } + switch typ { + case "M8", "m8": + return true + case "datetime64", "timedelta64": + return true + } + return false +} + +func (dt *ArrayDescr) init(descr string) error { + switch len(descr) { + case 0: + return fmt.Errorf("invalid typecode %q", descr) + + case 1: // a typecode like "d", "f", ... + dt.kind = descr[0] + + default: + dt.kind = descr[0] + v, err := strconv.ParseUint(descr[1:], 10, 64) + if err != nil { + return fmt.Errorf("could not parse typecode %q: %w", descr, err) + } + dt.esize = int(v) + switch dt.kind { + case 'b', 'i', 'u', 'f', 'm', 'M': + dt.align = dt.esize + case 'c': + dt.align = dt.esize / 2 + case 'O': + // dt.esize = 8 + dt.align = 8 + case 'S': + dt.align = 1 + case 'V': + dt.align = 1 + } + } + + return nil +} + +type structFields map[string]structField +type structField struct { + dtype ArrayDescr + offset uint32 +} + +type subarrayDescr struct { + dtype ArrayDescr + shape []int +} + +var ( + _ py.Callable = (*ArrayDescr)(nil) + _ py.PyStateSettable = (*ArrayDescr)(nil) +) + +func (*ArrayDescr) Call(args ...any) (any, error) { + switch sz := len(args); { + case sz < 1, sz > 3: + return nil, fmt.Errorf("invalid tuple length (got=%d)", sz) + } + + descr, ok := args[0].(string) + if !ok { + return nil, fmt.Errorf("invalid descr type %T", args[0]) + } + + const flags = 0 + return newDescrFromStr(descr, flags) +} + +func (dt *ArrayDescr) PySetState(arg any) error { + tuple, ok := arg.(*py.Tuple) + if !ok { + return fmt.Errorf("invalid argument type %T", arg) + } + + if tuple == nil { + // FIXME(sbinet): is a nil tuple a valid value ? + return fmt.Errorf("invalid __setstate__ argument %+v", tuple) + } + + var ( + vers int = 4 + order string + subarr py.Tuple + names py.Tuple + fields py.Dict + meta py.Dict + esize = -1 + align = -1 + flags = 0 + ) + + switch tuple.Len() { + case 9: + err := parseTuple(tuple, &vers, &order, &subarr, &names, &fields, &esize, &align, &flags, &meta) + if err != nil { + return fmt.Errorf("could not parse tuple: %w", err) + } + case 8: + err := parseTuple(tuple, &vers, &order, &subarr, &names, &fields, &esize, &align, &flags) + if err != nil { + return fmt.Errorf("could not parse tuple: %w", err) + } + case 7: + err := parseTuple(tuple, &vers, &order, &subarr, &names, &fields, &esize, &align) + if err != nil { + return fmt.Errorf("could not parse tuple: %w", err) + } + case 6: + err := parseTuple(tuple, &vers, &order, &subarr, &fields, &esize, &align) + if err != nil { + return fmt.Errorf("could not parse tuple: %w", err) + } + case 5: + vers = 0 + err := parseTuple(tuple, &order, &subarr, &fields, &esize, &align) + if err != nil { + return fmt.Errorf("could not parse tuple: %w", err) + } + default: + switch { + case tuple.Len() > 5: + v, ok := tuple.Get(0).(int) + if !ok { + return fmt.Errorf("invalid __setstate__ arg[0]: got=%T, want=int", tuple.Get(0)) + } + vers = v + default: + vers = -1 + } + } + + if vers < 0 || vers > 4 { + return fmt.Errorf("invalid version=%d for numpy.dtype pickle", vers) + } + + if vers == 0 || vers == 1 { + return fmt.Errorf("unhandled version=%d for numpy.dtype pickle", vers) + } + + switch order { + case "<": + dt.order = binary.LittleEndian + case ">": + dt.order = binary.BigEndian + case "=": + dt.order = nativeEndian + case "|": + dt.order = nil + } + + if subarr.Len() > 0 { + var ( + subdt ArrayDescr + tuple py.Tuple + shape []int + ) + err := parseTuple(&subarr, &subdt, &tuple) + if err != nil { + return fmt.Errorf("could not parse subarray tuple: %w", err) + } + for i := range tuple { + v, ok := tuple[i].(int) + if !ok { + return fmt.Errorf("could not parse subarray shape[%d]: type=%T", i, tuple[i]) + } + shape = append(shape, v) + } + dt.subarr = &subarrayDescr{ + dtype: subdt, + shape: shape, + } + } + + if names.Len() > 0 { + for _, v := range names { + name, ok := v.(string) + if !ok { + return fmt.Errorf("invalid field name type %T", v) + } + dt.names = append(dt.names, name) + } + } + + if fields.Len() > 0 { + dt.fields = make(structFields, fields.Len()) + for i := 0; i < fields.Len(); i++ { + v, ok := fields.Get(dt.names[i]) + if !ok { + return fmt.Errorf("invalid field offset name %q", dt.names[i]) + } + tup, ok := v.(*py.Tuple) + if !ok { + return fmt.Errorf("invalid field offset type %T", v) + } + if got, want := tup.Len(), 2; got != want { + return fmt.Errorf("invalid field offset tuple length (got=%d, want=%d)", got, want) + } + fdt, ok := tup.Get(0).(*ArrayDescr) + if !ok { + return fmt.Errorf("invalid field offset dtype") + } + offset, ok := tup.Get(1).(int) + if !ok { + return fmt.Errorf("invalid field offset") + } + dt.fields[dt.names[i]] = structField{*fdt, uint32(offset)} + } + } + + if esize >= 0 { + dt.esize = esize + } + if align >= 0 { + dt.align = align + } + if flags >= 0 { + dt.flags = flags + } + + if meta.Len() > 0 { + return fmt.Errorf("dtype with metadata not handled (yet?)") + } + + return nil +} + +func (dt ArrayDescr) unmarshal(raw []byte, shape []int) (any, error) { + // FIXME(sbinet): handle ndims + // FIXME(sbinet): handle sub-arrays ? + // FIXME(sbinet): handle strides + + if dt.subarr != nil { + return nil, fmt.Errorf("sub-arrays not handled") + } + + switch dt.kind { + case 'b': + data := make([]bool, len(raw)) + for i, v := range raw { + if v == 0 { + continue + } + data[i] = true + } + return data, nil + + case 'i': + switch dt.esize { + case 1: + data := make([]int8, len(raw)) + for i, v := range raw { + data[i] = int8(v) + } + return data, nil + + case 2: + const sz = 2 + data := make([]int16, 0, len(raw)/sz) + for i := 0; i < len(raw); i += sz { + data = append(data, int16(dt.order.Uint16(raw[i:]))) + } + return data, nil + + case 4: + const sz = 4 + data := make([]int32, 0, len(raw)/sz) + for i := 0; i < len(raw); i += sz { + data = append(data, int32(dt.order.Uint32(raw[i:]))) + } + return data, nil + + case 8: + const sz = 8 + data := make([]int64, 0, len(raw)/sz) + for i := 0; i < len(raw); i += sz { + data = append(data, int64(dt.order.Uint64(raw[i:]))) + } + return data, nil + + default: + return nil, fmt.Errorf("unhandled esize=%d for kind=%q", dt.esize, dt.kind) + } + + case 'u': + switch dt.esize { + case 1: + data := make([]uint8, len(raw)) + copy(data, raw) + return data, nil + + case 2: + const sz = 2 + data := make([]uint16, 0, len(raw)/sz) + for i := 0; i < len(raw); i += sz { + data = append(data, dt.order.Uint16(raw[i:])) + } + return data, nil + + case 4: + const sz = 4 + data := make([]uint32, 0, len(raw)/sz) + for i := 0; i < len(raw); i += sz { + data = append(data, dt.order.Uint32(raw[i:])) + } + return data, nil + + case 8: + const sz = 8 + data := make([]uint64, 0, len(raw)/sz) + for i := 0; i < len(raw); i += sz { + data = append(data, dt.order.Uint64(raw[i:])) + } + return data, nil + + default: + return nil, fmt.Errorf("unhandled esize=%d for kind=%q", dt.esize, dt.kind) + } + + case 'f': + switch dt.esize { + case 4: + const sz = 4 + data := make([]float32, 0, len(raw)/sz) + for i := 0; i < len(raw); i += sz { + data = append(data, math.Float32frombits(dt.order.Uint32(raw[i:]))) + } + return data, nil + + case 8: + const sz = 8 + data := make([]float64, 0, len(raw)/sz) + for i := 0; i < len(raw); i += sz { + data = append(data, math.Float64frombits(dt.order.Uint64(raw[i:]))) + } + return data, nil + + default: + return nil, fmt.Errorf("unhandled esize=%d for kind=%q", dt.esize, dt.kind) + } + + case 'c': + switch dt.esize { + case 8: + const sz = 8 + data := make([]complex64, 0, len(raw)/sz) + for i := 0; i < len(raw); i += sz { + data = append(data, complex( + math.Float32frombits(dt.order.Uint32(raw[i+0:])), + math.Float32frombits(dt.order.Uint32(raw[i+4:])), + )) + } + return data, nil + + case 16: + const sz = 16 + data := make([]complex128, 0, len(raw)/sz) + for i := 0; i < len(raw); i += sz { + data = append(data, complex( + math.Float64frombits(dt.order.Uint64(raw[i+0:])), + math.Float64frombits(dt.order.Uint64(raw[i+8:])), + )) + } + return data, nil + + default: + return nil, fmt.Errorf("unhandled esize=%d for kind=%q", dt.esize, dt.kind) + } + + case 'S': + switch len(shape) { + case 0: + return string(raw), nil // FIXME(sbinet): use subset ? (shape/dims/...) + default: + data := make([]string, 0, len(raw)/dt.esize) + for i := 0; i < len(raw); i += dt.esize { + data = append(data, string(raw[i:i+dt.esize])) // FIXME(sbinet): no-alloc ? + } + return data, nil + } + + case 'U': + order := utf32.BigEndian + if dt.order == binary.LittleEndian { + order = utf32.LittleEndian + } + dec := utf32.UTF32(order, utf32.IgnoreBOM).NewDecoder() + switch len(shape) { + case 0: + data, err := decodeUTF(dec, raw) + if err != nil { + return nil, fmt.Errorf("could not decode utf array: %w", err) + } + return data, nil + + default: + data := make([]string, 0, len(raw)/dt.esize) + for i := 0; i < len(raw); i += dt.esize { + v, err := decodeUTF(dec, raw[i:i+dt.esize]) + if err != nil { + return nil, fmt.Errorf("could not decode utf array element %d: %w", i/dt.esize, err) + } + data = append(data, v) + } + return data, nil + } + + case 'O': + pkl := newUnpickler(bytes.NewReader(raw)) + data, err := pkl.Load() + if err != nil { + return nil, fmt.Errorf("could not unpickle data: %w", err) + } + return data, nil + + default: + return nil, fmt.Errorf("unknown dtype [%c%d]", dt.kind, dt.esize) + } +} + +func (dt ArrayDescr) itemsize() int { + if dt.esize < 0 { + panic(fmt.Errorf("unknown dtype [%c%d]", dt.kind, dt.esize)) + } + return dt.esize +} + +func (dt ArrayDescr) String() string { + o := new(strings.Builder) + fmt.Fprintf(o, + "ArrayDescr{kind: '%s', order: '%s', flags: %d, esize: %d, align: %d, subarr: %v, names: %v, fields: %v, meta: %v}", + string(dt.kind), + orderToString(dt.order), + dt.flags, + dt.esize, + dt.align, + dt.subarr, + dt.names, + dt.fields, + dt.meta, + ) + return o.String() +} + +func (sfs structFields) String() string { + o := new(strings.Builder) + fmt.Fprintf(o, "{") + keys := make([]string, 0, len(sfs)) + for k := range sfs { + keys = append(keys, k) + } + sort.Strings(keys) + for i, k := range keys { + if i > 0 { + fmt.Fprintf(o, ", ") + } + v := sfs[k] + fmt.Fprintf(o, "%q: %v", k, v) + } + fmt.Fprintf(o, "}") + return o.String() +} + +func (sf structField) String() string { + o := new(strings.Builder) + fmt.Fprintf(o, "field{dtype: %v, offset: %d}", sf.dtype, sf.offset) + return o.String() +} + +func (sub subarrayDescr) String() string { + o := new(strings.Builder) + fmt.Fprintf(o, "subarr{dtype: %v, shape: %v}", sub.dtype, sub.shape) + return o.String() +} + +func decodeUTF(dec *encoding.Decoder, raw []byte) (string, error) { + // FIXME(sbinet): use subset ? (shape/dims/...) + vs := make([]byte, 0, utf8.RuneCount(raw)) + raw, err := dec.Bytes(raw) + if err != nil { + return "", err + } + i := 0 +loop: + for { + r, sz := utf8.DecodeRune(raw[i:]) + switch r { + case utf8.RuneError: + if sz == 0 { + break loop + } + return string(vs), fmt.Errorf("invalid rune") + default: + vs = utf8.AppendRune(vs, r) + i += sz + } + } + return string(vs), nil +} diff --git a/npy/gen-pickle.go b/npy/gen-pickle.go new file mode 100644 index 0000000..94ad8d1 --- /dev/null +++ b/npy/gen-pickle.go @@ -0,0 +1,657 @@ +// Copyright 2023 The npyio Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore + +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "go/format" + "io" + "log" + "os" + "os/exec" + "sort" + "strings" +) + +type Code struct { + Py string + Go string +} + +func main() { + src := new(bytes.Buffer) + + fmt.Fprintf(src, `// Copyright 2023 The npyio Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Automatically generated. DO NOT EDIT. + +package npy + +import ( + "encoding/binary" +) + +`) + + dtype(src, []string{ + `np.dtype(">b1")`, + `np.dtype("i1")`, + `np.dtype("i2")`, + `np.dtype("i4")`, + `np.dtype("i8")`, + `np.dtype("u1")`, + `np.dtype("u2")`, + `np.dtype("u4")`, + `np.dtype("u8")`, + `np.dtype("f4")`, + `np.dtype("f8")`, + `np.dtype("c8")`, + `np.dtype("c16")`, + `np.dtype("S4")`, + `np.dtype("=S4")`, + `np.dtype("|S4")`, + `np.dtype("S4")`, + `np.dtype("S8")`, + `np.dtype("S42")`, + `np.dtype("O4")`, + `np.dtype("=O4")`, + `np.dtype("|O4")`, + `np.dtype("O4")`, + `np.dtype("O8")`, + `np.dtype("=O8")`, + `np.dtype("|O8")`, + `np.dtype("O8")`, + `np.dtype([('f1', [('f1', np.int16)])])`, + `np.dtype("i4, (2,3)f8")`, + `np.dtype("i2, i4, (2,3)f8")`, + `np.dtype("i2, i8, (2,3)f8")`, + // UTF + `np.dtype("U10")`, + }) + ndarray(src, []Code{ + // bool + {`np.array([True,False,True], dtype=" 0 { + fmt.Fprintf(w, ", ") + } + fmt.Fprintf(w, "%d", v) + } + fmt.Fprintf(w, "},\n") + } + switch len(arr.Strides) { + case 0: + // no-op. + default: + fmt.Fprintf(w, "\tstrides: []int{") + for i, v := range arr.Strides { + if i > 0 { + fmt.Fprintf(w, ", ") + } + fmt.Fprintf(w, "%d", v) + } + fmt.Fprintf(w, "},\n") + } + fmt.Fprintf(w, "\tdata: %s,\n", godata) + fmt.Fprintf(w, "}") + return w.Bytes(), nil +} + +func marshalArrayDescr(dt ArrayDescr) ([]byte, error) { + w := new(bytes.Buffer) + fmt.Fprintf(w, "ArrayDescr{\n") + fmt.Fprintf(w, "\tkind: '%s',\n", dt.Kind) + order := "nativeEndian" + switch dt.Order { + case "<": + order = "binary.LittleEndian" + case ">": + order = "binary.BigEndian" + case "=": + order = "nativeEndian" + case "|": + order = "nil" + // FIXME(sbinet): handle as not applicable ? + default: + return nil, fmt.Errorf("unknown endianness %q", dt.Order) + } + if len(dt.Descr) == 1 && (dt.Kind != "V") { + descr := dt.Descr[0].Descr[0][1] + switch { + case strings.HasPrefix(descr, "<"): + order = "binary.LittleEndian" + case strings.HasPrefix(descr, ">"): + order = "binary.BigEndian" + case strings.HasPrefix(descr, "="): + order = "nativeEndian" + case strings.HasPrefix(descr, "|"): + order = "nil" + } + } + fmt.Fprintf(w, "\torder: %s,\n", order) + fmt.Fprintf(w, "\tesize: %d,\n", dt.Size) + fmt.Fprintf(w, "\talign: %d,\n", dt.Align) + if dt.Flags != 0 { + fmt.Fprintf(w, "\tflags: %d,\n", dt.Flags) + } + if len(dt.Names) > 0 { + fmt.Fprintf(w, "\tnames: []string{") + for i, name := range dt.Names { + if i > 0 { + fmt.Fprintf(w, ", ") + } + fmt.Fprintf(w, "%q", name) + } + fmt.Fprintf(w, "},\n") + } + + if len(dt.Fields) > 0 { + fmt.Fprintf(w, "\tfields: map[string]structField{\n") + keys := make([]string, 0, len(dt.Fields)) + for k := range dt.Fields { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + f := dt.Fields[k] + sub, err := marshalArrayDescr(f.Descr) + if err != nil { + return nil, fmt.Errorf("could not marshal field %q: %w", k, err) + } + fmt.Fprintf(w, "\t\t%q: {\ndtype: %s,\noffset: %d,\n},\n", k, sub, f.Offset) + } + fmt.Fprintf(w, "},\n") + } + + if dt.Subarr != nil { + fmt.Fprintf(w, "\tsubarr: &subarrayDescr{\n") + sub, err := marshalArrayDescr(dt.Subarr.Descr) + if err != nil { + return nil, fmt.Errorf("could not marshal subarray dtype %q: %w", dt.Subarr.Descr.Kind, err) + } + fmt.Fprintf(w, "\t\tdtype: %s,\n", sub) + fmt.Fprintf(w, "\t\tshape: []int{") + for i, dim := range dt.Subarr.Shape { + if i > 0 { + fmt.Fprintf(w, ", ") + } + fmt.Fprintf(w, "%d", dim) + } + fmt.Fprintf(w, "\t\t},\n") + fmt.Fprintf(w, "\t},\n") + } + + fmt.Fprintf(w, "}") + return w.Bytes(), nil +} + +const pyDtype = `#!/usr/bin/env python +import json +import pickle +import numpy as np + +pkl = open("%[1]s", "bw") +dt = np.dtype(%[3]s) +pickle.dump(dt, pkl, protocol=4) +pkl.close() + +def todescr(ds): + o = [] + for _,v in enumerate(ds): + d = { + "name": v[0], + "descr": v[1], + } + if type(v[1]) == type(""): + d["descr"] = [["", v[1]]] + if len(v) > 2: + d["shape"] = v[2] + o.append(d) + pass + return o + +def tofields(fs): + o = {} + if not fs: + return o + for k in fs: + v = fs[k] + o[k] = {"dtype":todtype(v[0]), "offset":v[1]} + return o + +def todtype(dt): + ## print(">>> dtype: %%s..." %%(dt,)) + orig = dt + shape = None + if type(dt) == type(tuple()): + shape = dt[1] + dt = dt[0] + pass + o = { + "descr": todescr(dt.descr), + "kind": dt.kind, + "esize": dt.itemsize, + "align": dt.alignment, + "order": str(dt.byteorder), + "fields": tofields(dt.fields), + "names": dt.names or [], + "flags": dt.flags, + } + if shape != None: + o["shape"] = [i for i in shape] + sub = dt.subdtype + if sub != None: + ## print(" >>> sub: %%s ==> %%s" %%(sub,sub[0].byteorder,)) + o["subarr"] = { + "dtype": todtype(sub[0]), + "shape": sub[1], + } + pass + ## print(">>> dtype: %%s ==> %%s" %%(orig,o,)) + return o + +txt = open("%[2]s", "w") +json.dump(todtype(dt), txt, indent=" ") +txt.close() +` + +const pyNdarray = `#!/usr/bin/env python +import json +import pickle +import numpy as np + +pkl = open("%[1]s", "bw") +arr = %[3]s +pickle.dump(arr, pkl, protocol=4) +pkl.close() + +def todescr(ds): + o = [] + for _,v in enumerate(ds): + d = { + "name": v[0], + "descr": v[1], + } + if type(v[1]) == type(""): + d["descr"] = [["", v[1]]] + if len(v) > 2: + d["shape"] = v[2] + o.append(d) + pass + return o + +def tofields(fs): + o = {} + if not fs: + return o + for k in fs: + v = fs[k] + o[k] = {"dtype":todtype(v[0]), "offset":v[1]} + return o + +def todtype(dt): + ## print(">>> dtype: %%s..." %%(dt,)) + orig = dt + shape = None + if type(dt) == type(tuple()): + shape = dt[1] + dt = dt[0] + pass + o = { + "descr": todescr(dt.descr), + "kind": dt.kind, + "esize": dt.itemsize, + "align": dt.alignment, + "order": str(dt.byteorder), + "fields": tofields(dt.fields), + "names": dt.names or [], + "flags": dt.flags, + } + if shape != None: + o["shape"] = [i for i in shape] + sub = dt.subdtype + if sub != None: + ## print(" >>> sub: %%s ==> %%s" %%(sub,sub[0].byteorder,)) + o["subarr"] = { + "dtype": todtype(sub[0]), + "shape": sub[1], + } + pass + ## print(">>> dtype: %%s ==> %%s" %%(orig,o,)) + return o + +def toarray(arr): + return { + "dtype": todtype(arr.dtype), + "shape": list(arr.shape), + "strides": list(arr.strides), + } + +txt = open("%[2]s", "w") +json.dump(toarray(arr), txt, indent=" ") +txt.close() +` diff --git a/npy/npy.go b/npy/npy.go index c7847d0..182dc68 100644 --- a/npy/npy.go +++ b/npy/npy.go @@ -132,6 +132,7 @@ var ( complex64Type = reflect.TypeOf((*complex64)(nil)).Elem() complex128Type = reflect.TypeOf((*complex128)(nil)).Elem() stringType = reflect.TypeOf((*string)(nil)).Elem() + anyType = reflect.TypeOf((*interface{})(nil)).Elem() trueUint8 = []byte{1} falseUint8 = []byte{0} @@ -205,6 +206,10 @@ func newDtype(str string) (dType, error) { case "c16", "c16", "complex128": dt.rt = complex128Type dt.size = 16 + + case "O", "|O", "object": + dt.rt = anyType + dt.size = 8 } switch { @@ -238,14 +243,31 @@ func newDtype(str string) (dType, error) { return dt, nil } -var nativeEndian binary.ByteOrder +var nativeEndian struct { + binary.ByteOrder +} func init() { v := uint16(1) switch byte(v >> 8) { case 0: - nativeEndian = binary.LittleEndian + nativeEndian.ByteOrder = binary.LittleEndian case 1: - nativeEndian = binary.BigEndian + nativeEndian.ByteOrder = binary.BigEndian + } +} + +func orderToString(v binary.ByteOrder) string { + switch v { + case binary.LittleEndian: + return "<" + case binary.BigEndian: + return ">" + case nativeEndian: + return "=" + case nil: + return "|" + default: + return "?" } } diff --git a/npy/pickle.go b/npy/pickle.go new file mode 100644 index 0000000..cc10112 --- /dev/null +++ b/npy/pickle.go @@ -0,0 +1,103 @@ +// Copyright 2023 The npyio Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package npy + +//go:generate go run ./gen-pickle.go + +import ( + "errors" + "fmt" + "io" + "reflect" + + "github.com/nlpodyssey/gopickle/pickle" + py "github.com/nlpodyssey/gopickle/types" +) + +func newUnpickler(r io.Reader) pickle.Unpickler { + u := pickle.NewUnpickler(r) + u.FindClass = ClassLoader + return u +} + +// ClassLoader provides a python class loader mechanism for python pickles +// containing numpy.dtype and numpy.ndarray values. +func ClassLoader(module, name string) (any, error) { + switch module + "." + name { + case "numpy.dtype": + return &ArrayDescr{}, nil + case "numpy.ndarray": + return &Array{}, nil + case "numpy.core.multiarray._reconstruct": + return reconstruct{}, nil + } + + return nil, fmt.Errorf("could not unpickle %q: %w", module+"."+name, errors.ErrUnsupported) +} + +type reconstruct struct{} + +var _ py.Callable = (*reconstruct)(nil) + +func (reconstruct) Call(args ...any) (any, error) { + switch sz := len(args); sz { + case 3: + // ok. + default: + return nil, fmt.Errorf("invalid tuple length (got=%d)", sz) + } + + var ( + subtype = args[0] // ex: numpy.ndarray + // shape = args[1] // a tuple, usually (0,) + // dtype = args[2] // a dummy dtype (usually "b") + ) + + switch v := subtype.(type) { + case py.PyNewable: + var ( + dtype = "b" + shape []int + strides []int + data []byte + flags int + ) + descr, err := newDescrFrom(dtype, flags) + if err != nil { + return nil, fmt.Errorf("could not convert %v (type=%T) to dtype: %w", dtype, dtype, err) + } + return v.PyNew(subtype, descr, shape, strides, data, flags) + } + + return subtype, nil +} + +func parseTuple(tup *py.Tuple, args ...any) error { + if want, got := tup.Len(), len(args); want != got { + return fmt.Errorf("invalid number of arguments: got=%d, want=%d", got, want) + } + + for i := range args { + src := tup.Get(i) + if src == nil { + continue + } + dst := args[i] + if dst == nil { + continue + } + rsrc := reflect.Indirect(reflect.ValueOf(src)) + rdst := reflect.Indirect(reflect.ValueOf(dst)) + if !rdst.CanSet() { + return fmt.Errorf("can not set arg[%d] destination: type=%T", i, dst) + } + if rsrc.Type() != rdst.Type() { + return fmt.Errorf("can not convert arg[%d] %T to %T", i, rsrc.Interface(), rdst.Interface()) + } + rdst.Set(rsrc) + } + + return nil +} diff --git a/npy/pickle_test.go b/npy/pickle_test.go new file mode 100644 index 0000000..f5036e3 --- /dev/null +++ b/npy/pickle_test.go @@ -0,0 +1,53 @@ +// Copyright 2023 The npyio Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package npy + +import ( + "reflect" + "strings" + "testing" + + "github.com/nlpodyssey/gopickle/pickle" + py "github.com/nlpodyssey/gopickle/types" +) + +func TestUnpickleDtype(t *testing.T) { + for _, tc := range dtypeTests { + t.Run(tc.name, func(t *testing.T) { + pkl := pickle.NewUnpickler(strings.NewReader(tc.pkl)) + pkl.FindClass = ClassLoader + + got, err := pkl.Load() + if err != nil { + t.Fatalf("could not unpickle: %+v", err) + } + + if got, want := got, tc.want; !reflect.DeepEqual(got, want) { + t.Fatalf("invalid unpickled data for %q:\ngot= %+v\nwant=%+v", tc.code, got, want) + } + }) + } +} + +func TestUnpickleNdarray(t *testing.T) { + for _, tc := range ndarrayTests { + t.Run(tc.name, func(t *testing.T) { + pkl := pickle.NewUnpickler(strings.NewReader(tc.pkl)) + pkl.FindClass = ClassLoader + + got, err := pkl.Load() + if err != nil { + t.Fatalf("could not unpickle: %+v", err) + } + if got, want := got, tc.want; !reflect.DeepEqual(got, want) { + t.Fatalf("invalid unpickled data for %q:\ngot= %+v\nwant=%+v", tc.code, got, want) + } + }) + } +} + +func pylist(sli ...any) *py.List { + return py.NewListFromSlice(sli) +} diff --git a/npy/reader.go b/npy/reader.go index efb0201..6cc67c4 100644 --- a/npy/reader.go +++ b/npy/reader.go @@ -189,6 +189,50 @@ func (r *Reader) Read(ptr interface{}) error { case *int, *uint, *[]int, *[]uint: return ErrInvalidType + case *Array: + const flags = 0 + descr, err := newDescrFrom(r.Header.Descr.Type, flags) + if err != nil { + return fmt.Errorf("could not create array description from %q: %w", r.Header.Descr.Type, err) + } + + if descr.esize < 0 { + // FIXME(sbinet): shouldn't this be addressed in newDescrFrom ? + // check with c-numpy. (for dtype="|O") + descr.esize = dt.size + } + + vptr.descr = *descr + vptr.fortran = r.Header.Descr.Fortran + vptr.shape = r.Header.Descr.Shape + + err = vptr.setupStrides() + if err != nil { + return fmt.Errorf("could not setup array strides for %q: %w", r.Header.Descr.Type, err) + } + + raw, err := io.ReadAll(r.r) + if err != nil { + return fmt.Errorf("could not consume all data: %w", err) + } + + data, err := vptr.descr.unmarshal(raw, r.Header.Descr.Shape) + if err != nil { + return fmt.Errorf("could not unmarshal array data: %w", err) + } + + if vptr.descr.kind == 'O' { + // data (for dtype("object")) is stored as a numpy.array inside a pickle. + // remove the extra numpy.array indirection. + if arr, ok := data.(*Array); ok { + *vptr = *arr + data = arr.data + } + } + + vptr.data = data + return nil + case *mat.Dense: var data []float64 err := r.Read(&data) diff --git a/npy/reader_test.go b/npy/reader_test.go index 01c67ba..75f9e49 100644 --- a/npy/reader_test.go +++ b/npy/reader_test.go @@ -415,3 +415,22 @@ func TestStringLenDtype(t *testing.T) { } } } + +func TestRaggedArray(t *testing.T) { + want := pylist(pylist(1, 2, 3, 4), pylist(5, 6, 7), pylist(8, 9)) + f, err := os.Open("../testdata/ragged-array.npy") + if err != nil { + t.Errorf("error: %v\n", err) + } + defer f.Close() + + var arr Array + err = Read(f, &arr) + if err != nil { + t.Fatalf("error reading data: %v\n", err) + } + + if got, want := arr.Data(), want; !reflect.DeepEqual(got, want) { + t.Fatalf("invalid ragged-array:\ngot= %v\nwant=%v", got, want) + } +} diff --git a/npy/zall_test.go b/npy/zall_test.go new file mode 100644 index 0000000..2296b10 --- /dev/null +++ b/npy/zall_test.go @@ -0,0 +1,1542 @@ +// Copyright 2023 The npyio Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Automatically generated. DO NOT EDIT. + +package npy + +import ( + "encoding/binary" +) + +var dtypeTests = []struct { + name string + code string + pkl string + want *ArrayDescr +}{ + { + // pickle.dumps(np.dtype(">b1"), protocol=4) + name: "dtype-0", + code: `np.dtype(">b1")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02b1\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b.", + want: &ArrayDescr{ + kind: 'b', + order: nil, + esize: 1, + align: 1, + }, + }, + { + // pickle.dumps(np.dtype("i1"), protocol=4) + name: "dtype-2", + code: `np.dtype(">i1")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02i1\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b.", + want: &ArrayDescr{ + kind: 'i', + order: nil, + esize: 1, + align: 1, + }, + }, + { + // pickle.dumps(np.dtype("i2"), protocol=4) + name: "dtype-4", + code: `np.dtype(">i2")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02i2\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01>\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b.", + want: &ArrayDescr{ + kind: 'i', + order: binary.BigEndian, + esize: 2, + align: 2, + }, + }, + { + // pickle.dumps(np.dtype("i4"), protocol=4) + name: "dtype-6", + code: `np.dtype(">i4")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02i4\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01>\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b.", + want: &ArrayDescr{ + kind: 'i', + order: binary.BigEndian, + esize: 4, + align: 4, + }, + }, + { + // pickle.dumps(np.dtype("i8"), protocol=4) + name: "dtype-8", + code: `np.dtype(">i8")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02i8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01>\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b.", + want: &ArrayDescr{ + kind: 'i', + order: binary.BigEndian, + esize: 8, + align: 8, + }, + }, + { + // pickle.dumps(np.dtype("u1"), protocol=4) + name: "dtype-14", + code: `np.dtype(">u1")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02u1\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b.", + want: &ArrayDescr{ + kind: 'u', + order: nil, + esize: 1, + align: 1, + }, + }, + { + // pickle.dumps(np.dtype("u2"), protocol=4) + name: "dtype-16", + code: `np.dtype(">u2")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02u2\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01>\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b.", + want: &ArrayDescr{ + kind: 'u', + order: binary.BigEndian, + esize: 2, + align: 2, + }, + }, + { + // pickle.dumps(np.dtype("u4"), protocol=4) + name: "dtype-18", + code: `np.dtype(">u4")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02u4\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01>\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b.", + want: &ArrayDescr{ + kind: 'u', + order: binary.BigEndian, + esize: 4, + align: 4, + }, + }, + { + // pickle.dumps(np.dtype("u8"), protocol=4) + name: "dtype-20", + code: `np.dtype(">u8")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02u8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01>\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b.", + want: &ArrayDescr{ + kind: 'u', + order: binary.BigEndian, + esize: 8, + align: 8, + }, + }, + { + // pickle.dumps(np.dtype("f4"), protocol=4) + name: "dtype-26", + code: `np.dtype(">f4")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02f4\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01>\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b.", + want: &ArrayDescr{ + kind: 'f', + order: binary.BigEndian, + esize: 4, + align: 4, + }, + }, + { + // pickle.dumps(np.dtype("f8"), protocol=4) + name: "dtype-28", + code: `np.dtype(">f8")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02f8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01>\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b.", + want: &ArrayDescr{ + kind: 'f', + order: binary.BigEndian, + esize: 8, + align: 8, + }, + }, + { + // pickle.dumps(np.dtype("c8"), protocol=4) + name: "dtype-30", + code: `np.dtype(">c8")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02c8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01>\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b.", + want: &ArrayDescr{ + kind: 'c', + order: binary.BigEndian, + esize: 8, + align: 4, + }, + }, + { + // pickle.dumps(np.dtype("c16"), protocol=4) + name: "dtype-32", + code: `np.dtype(">c16")`, + pkl: "\x80\x04\x958\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x03c16\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01>\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b.", + want: &ArrayDescr{ + kind: 'c', + order: binary.BigEndian, + esize: 16, + align: 8, + }, + }, + { + // pickle.dumps(np.dtype("S4"), protocol=4) + name: "dtype-35", + code: `np.dtype(">S4")`, + pkl: "\x80\x04\x951\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02S4\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNK\x04K\x01K\x00t\x94b.", + want: &ArrayDescr{ + kind: 'S', + order: nil, + esize: 4, + align: 1, + }, + }, + { + // pickle.dumps(np.dtype("=S4"), protocol=4) + name: "dtype-36", + code: `np.dtype("=S4")`, + pkl: "\x80\x04\x951\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02S4\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNK\x04K\x01K\x00t\x94b.", + want: &ArrayDescr{ + kind: 'S', + order: nil, + esize: 4, + align: 1, + }, + }, + { + // pickle.dumps(np.dtype("|S4"), protocol=4) + name: "dtype-37", + code: `np.dtype("|S4")`, + pkl: "\x80\x04\x951\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02S4\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNK\x04K\x01K\x00t\x94b.", + want: &ArrayDescr{ + kind: 'S', + order: nil, + esize: 4, + align: 1, + }, + }, + { + // pickle.dumps(np.dtype("S4"), protocol=4) + name: "dtype-38", + code: `np.dtype("S4")`, + pkl: "\x80\x04\x951\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02S4\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNK\x04K\x01K\x00t\x94b.", + want: &ArrayDescr{ + kind: 'S', + order: nil, + esize: 4, + align: 1, + }, + }, + { + // pickle.dumps(np.dtype("S8"), protocol=4) + name: "dtype-39", + code: `np.dtype("S8")`, + pkl: "\x80\x04\x951\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02S8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNK\bK\x01K\x00t\x94b.", + want: &ArrayDescr{ + kind: 'S', + order: nil, + esize: 8, + align: 1, + }, + }, + { + // pickle.dumps(np.dtype("S42"), protocol=4) + name: "dtype-40", + code: `np.dtype("S42")`, + pkl: "\x80\x04\x952\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x03S42\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNK*K\x01K\x00t\x94b.", + want: &ArrayDescr{ + kind: 'S', + order: nil, + esize: 42, + align: 1, + }, + }, + { + // pickle.dumps(np.dtype("O4"), protocol=4) + name: "dtype-42", + code: `np.dtype(">O4")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02O8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK?t\x94b.", + want: &ArrayDescr{ + kind: 'O', + order: nil, + esize: 8, + align: 8, + flags: 63, + }, + }, + { + // pickle.dumps(np.dtype("=O4"), protocol=4) + name: "dtype-43", + code: `np.dtype("=O4")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02O8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK?t\x94b.", + want: &ArrayDescr{ + kind: 'O', + order: nil, + esize: 8, + align: 8, + flags: 63, + }, + }, + { + // pickle.dumps(np.dtype("|O4"), protocol=4) + name: "dtype-44", + code: `np.dtype("|O4")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02O8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK?t\x94b.", + want: &ArrayDescr{ + kind: 'O', + order: nil, + esize: 8, + align: 8, + flags: 63, + }, + }, + { + // pickle.dumps(np.dtype("O4"), protocol=4) + name: "dtype-45", + code: `np.dtype("O4")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02O8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK?t\x94b.", + want: &ArrayDescr{ + kind: 'O', + order: nil, + esize: 8, + align: 8, + flags: 63, + }, + }, + { + // pickle.dumps(np.dtype("O8"), protocol=4) + name: "dtype-47", + code: `np.dtype(">O8")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02O8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK?t\x94b.", + want: &ArrayDescr{ + kind: 'O', + order: nil, + esize: 8, + align: 8, + flags: 63, + }, + }, + { + // pickle.dumps(np.dtype("=O8"), protocol=4) + name: "dtype-48", + code: `np.dtype("=O8")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02O8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK?t\x94b.", + want: &ArrayDescr{ + kind: 'O', + order: nil, + esize: 8, + align: 8, + flags: 63, + }, + }, + { + // pickle.dumps(np.dtype("|O8"), protocol=4) + name: "dtype-49", + code: `np.dtype("|O8")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02O8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK?t\x94b.", + want: &ArrayDescr{ + kind: 'O', + order: nil, + esize: 8, + align: 8, + flags: 63, + }, + }, + { + // pickle.dumps(np.dtype("O8"), protocol=4) + name: "dtype-50", + code: `np.dtype("O8")`, + pkl: "\x80\x04\x957\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02O8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK?t\x94b.", + want: &ArrayDescr{ + kind: 'O', + order: nil, + esize: 8, + align: 8, + flags: 63, + }, + }, + { + // pickle.dumps(np.dtype([('f1', [('f1', np.int16)])]), protocol=4) + name: "dtype-51", + code: `np.dtype([('f1', [('f1', np.int16)])])`, + pkl: "\x80\x04\x95\x8e\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02V2\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94N\x8c\x02f1\x94\x85\x94}\x94h\ah\x02\x8c\x02V2\x94\x89\x88\x87\x94R\x94(K\x03h\x06Nh\a\x85\x94}\x94h\ah\x02\x8c\x02i2\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01<\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94bK\x00\x86\x94sK\x02K\x01K\x10t\x94bK\x00\x86\x94sK\x02K\x01K\x10t\x94b.", + want: &ArrayDescr{ + kind: 'V', + order: nil, + esize: 2, + align: 1, + flags: 16, + names: []string{"f1"}, + fields: map[string]structField{ + "f1": { + dtype: ArrayDescr{ + kind: 'V', + order: nil, + esize: 2, + align: 1, + flags: 16, + names: []string{"f1"}, + fields: map[string]structField{ + "f1": { + dtype: ArrayDescr{ + kind: 'i', + order: binary.LittleEndian, + esize: 2, + align: 2, + }, + offset: 0, + }, + }, + }, + offset: 0, + }, + }, + }, + }, + { + // pickle.dumps(np.dtype("i4, (2,3)f8"), protocol=4) + name: "dtype-52", + code: `np.dtype("i4, (2,3)f8")`, + pkl: "\x80\x04\x95\xbc\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x03V52\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94N\x8c\x02f0\x94\x8c\x02f1\x94\x86\x94}\x94(h\ah\x02\x8c\x02i4\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01<\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94bK\x00\x86\x94h\bh\x02\x8c\x03V48\x94\x89\x88\x87\x94R\x94(K\x03h\x06h\x02\x8c\x02f8\x94\x89\x88\x87\x94R\x94(K\x03h\x0eNNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94bK\x02K\x03\x86\x94\x86\x94NNK0K\bK\x00t\x94bK\x04\x86\x94uK4K\x01K\x10t\x94b.", + want: &ArrayDescr{ + kind: 'V', + order: nil, + esize: 52, + align: 1, + flags: 16, + names: []string{"f0", "f1"}, + fields: map[string]structField{ + "f0": { + dtype: ArrayDescr{ + kind: 'i', + order: binary.LittleEndian, + esize: 4, + align: 4, + }, + offset: 0, + }, + "f1": { + dtype: ArrayDescr{ + kind: 'V', + order: nil, + esize: 48, + align: 8, + subarr: &subarrayDescr{ + dtype: ArrayDescr{ + kind: 'f', + order: binary.LittleEndian, + esize: 8, + align: 8, + }, + shape: []int{2, 3}, + }, + }, + offset: 4, + }, + }, + }, + }, + { + // pickle.dumps(np.dtype("i2, i4, (2,3)f8"), protocol=4) + name: "dtype-53", + code: `np.dtype("i2, i4, (2,3)f8")`, + pkl: "\x80\x04\x95\xeb\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x03V54\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94N\x8c\x02f0\x94\x8c\x02f1\x94\x8c\x02f2\x94\x87\x94}\x94(h\ah\x02\x8c\x02i2\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01<\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94bK\x00\x86\x94h\bh\x02\x8c\x02i4\x94\x89\x88\x87\x94R\x94(K\x03h\x0fNNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94bK\x02\x86\x94h\th\x02\x8c\x03V48\x94\x89\x88\x87\x94R\x94(K\x03h\x06h\x02\x8c\x02f8\x94\x89\x88\x87\x94R\x94(K\x03h\x0fNNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94bK\x02K\x03\x86\x94\x86\x94NNK0K\bK\x00t\x94bK\x06\x86\x94uK6K\x01K\x10t\x94b.", + want: &ArrayDescr{ + kind: 'V', + order: nil, + esize: 54, + align: 1, + flags: 16, + names: []string{"f0", "f1", "f2"}, + fields: map[string]structField{ + "f0": { + dtype: ArrayDescr{ + kind: 'i', + order: binary.LittleEndian, + esize: 2, + align: 2, + }, + offset: 0, + }, + "f1": { + dtype: ArrayDescr{ + kind: 'i', + order: binary.LittleEndian, + esize: 4, + align: 4, + }, + offset: 2, + }, + "f2": { + dtype: ArrayDescr{ + kind: 'V', + order: nil, + esize: 48, + align: 8, + subarr: &subarrayDescr{ + dtype: ArrayDescr{ + kind: 'f', + order: binary.LittleEndian, + esize: 8, + align: 8, + }, + shape: []int{2, 3}, + }, + }, + offset: 6, + }, + }, + }, + }, + { + // pickle.dumps(np.dtype("i2, i8, (2,3)f8"), protocol=4) + name: "dtype-54", + code: `np.dtype("i2, i8, (2,3)f8")`, + pkl: "\x80\x04\x95\xeb\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x03V58\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94N\x8c\x02f0\x94\x8c\x02f1\x94\x8c\x02f2\x94\x87\x94}\x94(h\ah\x02\x8c\x02i2\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01<\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94bK\x00\x86\x94h\bh\x02\x8c\x02i8\x94\x89\x88\x87\x94R\x94(K\x03h\x0fNNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94bK\x02\x86\x94h\th\x02\x8c\x03V48\x94\x89\x88\x87\x94R\x94(K\x03h\x06h\x02\x8c\x02f8\x94\x89\x88\x87\x94R\x94(K\x03h\x0fNNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94bK\x02K\x03\x86\x94\x86\x94NNK0K\bK\x00t\x94bK\n\x86\x94uK:K\x01K\x10t\x94b.", + want: &ArrayDescr{ + kind: 'V', + order: nil, + esize: 58, + align: 1, + flags: 16, + names: []string{"f0", "f1", "f2"}, + fields: map[string]structField{ + "f0": { + dtype: ArrayDescr{ + kind: 'i', + order: binary.LittleEndian, + esize: 2, + align: 2, + }, + offset: 0, + }, + "f1": { + dtype: ArrayDescr{ + kind: 'i', + order: binary.LittleEndian, + esize: 8, + align: 8, + }, + offset: 2, + }, + "f2": { + dtype: ArrayDescr{ + kind: 'V', + order: nil, + esize: 48, + align: 8, + subarr: &subarrayDescr{ + dtype: ArrayDescr{ + kind: 'f', + order: binary.LittleEndian, + esize: 8, + align: 8, + }, + shape: []int{2, 3}, + }, + }, + offset: 10, + }, + }, + }, + }, + { + // pickle.dumps(np.dtype("U10"), protocol=4) + name: "dtype-56", + code: `np.dtype(">U10")`, + pkl: "\x80\x04\x952\x00\x00\x00\x00\x00\x00\x00\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x03U10\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01>\x94NNNK(K\x04K\bt\x94b.", + want: &ArrayDescr{ + kind: 'U', + order: binary.BigEndian, + esize: 40, + align: 4, + flags: 8, + }, + }, +} + +var ndarrayTests = []struct { + name string + code string + pkl string + want *Array +}{ + { + // pickle.dumps(np.array([True,False,True], dtype="\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C\x06\xff\xff\x00\x02\xff\xfd\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'i', + order: binary.BigEndian, + esize: 2, + align: 2, + }, + shape: []int{3}, + strides: []int{2}, + data: []int16{-1, +2, -3}, + }, + }, + { + // pickle.dumps(np.array([-1,+2,-3], dtype="\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C\f\xff\xff\xff\xff\x00\x00\x00\x02\xff\xff\xff\xfd\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'i', + order: binary.BigEndian, + esize: 4, + align: 4, + }, + shape: []int{3}, + strides: []int{4}, + data: []int32{-1, +2, -3}, + }, + }, + { + // pickle.dumps(np.array([-1,+2,-3], dtype="\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C\x18\xff\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x02\xff\xff\xff\xff\xff\xff\xff\xfd\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'i', + order: binary.BigEndian, + esize: 8, + align: 8, + }, + shape: []int{3}, + strides: []int{8}, + data: []int64{-1, +2, -3}, + }, + }, + { + // pickle.dumps(np.array([1,2,3], dtype="\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C\x06\x00\x01\x00\x02\x00\x03\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'u', + order: binary.BigEndian, + esize: 2, + align: 2, + }, + shape: []int{3}, + strides: []int{2}, + data: []uint16{1, 2, 3}, + }, + }, + { + // pickle.dumps(np.array([1,2,3], dtype="\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C\f\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'u', + order: binary.BigEndian, + esize: 4, + align: 4, + }, + shape: []int{3}, + strides: []int{4}, + data: []uint32{1, 2, 3}, + }, + }, + { + // pickle.dumps(np.array([1,2,3], dtype="\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C\x18\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'u', + order: binary.BigEndian, + esize: 8, + align: 8, + }, + shape: []int{3}, + strides: []int{8}, + data: []uint64{1, 2, 3}, + }, + }, + { + // pickle.dumps(np.array([-1,+2,-3], dtype="\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C\f\xbf\x80\x00\x00@\x00\x00\x00\xc0@\x00\x00\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'f', + order: binary.BigEndian, + esize: 4, + align: 4, + }, + shape: []int{3}, + strides: []int{4}, + data: []float32{-1, +2, -3}, + }, + }, + { + // pickle.dumps(np.array([-1,+2,-3], dtype="\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C\x18\xbf\xf0\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\xc0\b\x00\x00\x00\x00\x00\x00\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'f', + order: binary.BigEndian, + esize: 8, + align: 8, + }, + shape: []int{3}, + strides: []int{8}, + data: []float64{-1, +2, -3}, + }, + }, + { + // pickle.dumps(np.array([(-1+1j),(+2-2j),(-3+3j)], dtype="\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C\x18\xbf\x80\x00\x00?\x80\x00\x00@\x00\x00\x00\xc0\x00\x00\x00\xc0@\x00\x00@@\x00\x00\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'c', + order: binary.BigEndian, + esize: 8, + align: 4, + }, + shape: []int{3}, + strides: []int{8}, + data: []complex64{complex(-1, 1), complex(2, -2), complex(-3, 3)}, + }, + }, + { + // pickle.dumps(np.array([(-1+1j),(+2-2j),(-3+3j)], dtype="\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C0\xbf\xf0\x00\x00\x00\x00\x00\x00?\xf0\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\xc0\b\x00\x00\x00\x00\x00\x00@\b\x00\x00\x00\x00\x00\x00\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'c', + order: binary.BigEndian, + esize: 16, + align: 8, + }, + shape: []int{3}, + strides: []int{16}, + data: []complex128{complex(-1, 1), complex(2, -2), complex(-3, 3)}, + }, + }, + { + // pickle.dumps(np.array("hello world!", dtype="S12"), protocol=4) + name: "ndarray-26", + code: `np.array("hello world!", dtype="S12")`, + pkl: "\x80\x04\x95\x8c\x00\x00\x00\x00\x00\x00\x00\x8c\x15numpy.core.multiarray\x94\x8c\f_reconstruct\x94\x93\x94\x8c\x05numpy\x94\x8c\andarray\x94\x93\x94K\x00\x85\x94C\x01b\x94\x87\x94R\x94(K\x01)h\x03\x8c\x05dtype\x94\x93\x94\x8c\x03S12\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNK\fK\x01K\x00t\x94b\x89C\fhello world!\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'S', + order: nil, + esize: 12, + align: 1, + }, + data: "hello world!", + }, + }, + { + // pickle.dumps(np.array(["hell","o wo", "rld!"], dtype="S4"), protocol=4) + name: "ndarray-27", + code: `np.array(["hell","o wo", "rld!"], dtype="S4")`, + pkl: "\x80\x04\x95\x8e\x00\x00\x00\x00\x00\x00\x00\x8c\x15numpy.core.multiarray\x94\x8c\f_reconstruct\x94\x93\x94\x8c\x05numpy\x94\x8c\andarray\x94\x93\x94K\x00\x85\x94C\x01b\x94\x87\x94R\x94(K\x01K\x03\x85\x94h\x03\x8c\x05dtype\x94\x93\x94\x8c\x02S4\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNK\x04K\x01K\x00t\x94b\x89C\fhello world!\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'S', + order: nil, + esize: 4, + align: 1, + }, + shape: []int{3}, + strides: []int{4}, + data: []string{"hell", "o wo", "rld!"}, + }, + }, + { + // pickle.dumps(np.array("hello, 世界!", dtype="\x94NNNK(K\x04K\bt\x94b\x89C(\x00\x00\x00h\x00\x00\x00e\x00\x00\x00l\x00\x00\x00l\x00\x00\x00o\x00\x00\x00,\x00\x00\x00 \x00\x00N\x16\x00\x00uL\x00\x00\x00!\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'U', + order: binary.BigEndian, + esize: 40, + align: 4, + flags: 8, + }, + data: "hello, 世界!", + }, + }, + { + // pickle.dumps(np.array(["hello, 世界!"], dtype="\x94NNNK(K\x04K\bt\x94b\x89C(\x00\x00\x00h\x00\x00\x00e\x00\x00\x00l\x00\x00\x00l\x00\x00\x00o\x00\x00\x00,\x00\x00\x00 \x00\x00N\x16\x00\x00uL\x00\x00\x00!\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'U', + order: binary.BigEndian, + esize: 40, + align: 4, + flags: 8, + }, + shape: []int{1}, + strides: []int{40}, + data: []string{"hello, 世界!"}, + }, + }, + { + // pickle.dumps(np.array([["hello"], [", 世界!"]], dtype="\x94NNNK\x14K\x04K\bt\x94b\x89C(\x00\x00\x00h\x00\x00\x00e\x00\x00\x00l\x00\x00\x00l\x00\x00\x00o\x00\x00\x00,\x00\x00\x00 \x00\x00N\x16\x00\x00uL\x00\x00\x00!\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'U', + order: binary.BigEndian, + esize: 20, + align: 4, + flags: 8, + }, + shape: []int{2, 1}, + strides: []int{20, 20}, + data: []string{"hello", ", 世界!"}, + }, + }, + { + // pickle.dumps(np.array([[-1,-2,-3],[-4,-5,-6]], dtype="\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C0\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\xfd\xff\xff\xff\xff\xff\xff\xff\xfc\xff\xff\xff\xff\xff\xff\xff\xfb\xff\xff\xff\xff\xff\xff\xff\xfa\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'i', + order: binary.BigEndian, + esize: 8, + align: 8, + }, + shape: []int{2, 3}, + strides: []int{24, 8}, + data: []int64{-1, -2, -3, -4, -5, -6}, + }, + }, + { + // pickle.dumps(np.array([[(-1+1j),(+2-2j),(-3+3j)],[(-4+4j),(-5+5j),(-6+6j)]], dtype="\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94b\x89C`\xbf\xf0\x00\x00\x00\x00\x00\x00?\xf0\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\xc0\b\x00\x00\x00\x00\x00\x00@\b\x00\x00\x00\x00\x00\x00\xc0\x10\x00\x00\x00\x00\x00\x00@\x10\x00\x00\x00\x00\x00\x00\xc0\x14\x00\x00\x00\x00\x00\x00@\x14\x00\x00\x00\x00\x00\x00\xc0\x18\x00\x00\x00\x00\x00\x00@\x18\x00\x00\x00\x00\x00\x00\x94t\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'c', + order: binary.BigEndian, + esize: 16, + align: 8, + }, + shape: []int{2, 3}, + strides: []int{48, 16}, + data: []complex128{complex(-1, 1), complex(2, -2), complex(-3, 3), complex(-4, 4), complex(-5, 5), complex(-6, 6)}, + }, + }, + { + // pickle.dumps(np.array([[-1],[-2,-3],[-4,-5,-6]], dtype="object"), protocol=4) + name: "ndarray-38", + code: `np.array([[-1],[-2,-3],[-4,-5,-6]], dtype="object")`, + pkl: "\x80\x04\x95\xb2\x00\x00\x00\x00\x00\x00\x00\x8c\x15numpy.core.multiarray\x94\x8c\f_reconstruct\x94\x93\x94\x8c\x05numpy\x94\x8c\andarray\x94\x93\x94K\x00\x85\x94C\x01b\x94\x87\x94R\x94(K\x01K\x03\x85\x94h\x03\x8c\x05dtype\x94\x93\x94\x8c\x02O8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK?t\x94b\x89]\x94(]\x94J\xff\xff\xff\xffa]\x94(J\xfe\xff\xff\xffJ\xfd\xff\xff\xffe]\x94(J\xfc\xff\xff\xffJ\xfb\xff\xff\xffJ\xfa\xff\xff\xffeet\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'O', + order: nil, + esize: 8, + align: 8, + flags: 63, + }, + shape: []int{3}, + strides: []int{8}, + data: pylist(pylist(-1), pylist(-2, -3), pylist(-4, -5, -6)), + }, + }, + { + // pickle.dumps(np.array([[-1],["-2",-3],[-4,-5,"-6"]], dtype="object"), protocol=4) + name: "ndarray-39", + code: `np.array([[-1],["-2",-3],[-4,-5,"-6"]], dtype="object")`, + pkl: "\x80\x04\x95\xb2\x00\x00\x00\x00\x00\x00\x00\x8c\x15numpy.core.multiarray\x94\x8c\f_reconstruct\x94\x93\x94\x8c\x05numpy\x94\x8c\andarray\x94\x93\x94K\x00\x85\x94C\x01b\x94\x87\x94R\x94(K\x01K\x03\x85\x94h\x03\x8c\x05dtype\x94\x93\x94\x8c\x02O8\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01|\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK?t\x94b\x89]\x94(]\x94J\xff\xff\xff\xffa]\x94(\x8c\x02-2\x94J\xfd\xff\xff\xffe]\x94(J\xfc\xff\xff\xffJ\xfb\xff\xff\xff\x8c\x02-6\x94eet\x94b.", + want: &Array{ + descr: ArrayDescr{ + kind: 'O', + order: nil, + esize: 8, + align: 8, + flags: 63, + }, + shape: []int{3}, + strides: []int{8}, + data: pylist(pylist(-1), pylist("-2", -3), pylist(-4, -5, "-6")), + }, + }, +} diff --git a/testdata/ragged-array.npy b/testdata/ragged-array.npy new file mode 100644 index 0000000000000000000000000000000000000000..df67145025b5db6adc83b5ec2ae3e693c3339199 GIT binary patch literal 315 zcmbu1%}T>S6oqFp{T=`68)P>lglyc18yA8LFQ!ru>LQfMWJIA&awiiZ*n*orgDmA_~PzX`NNYthBk=<2yC4jpy~Ns*KO_ zGP;PuRrnwAPo|4zQMIuyt%(=S+`TDlRm*4A=(4DtZM5S^4lQ2fhc8kfYYc8_hHQhK z33eq=U~BCC6?4~ChGTyk5JJXRIM{w|a5zCOflaArOFT-F