Skip to content

Commit

Permalink
Merge pull request #1 from whosonfirst/props
Browse files Browse the repository at this point in the history
Bug fix: Ensure geometries are included
  • Loading branch information
thisisaaronland committed Aug 19, 2024
2 parents 0db7ccb + 566e271 commit f1478ed
Show file tree
Hide file tree
Showing 10 changed files with 1,151 additions and 88 deletions.
121 changes: 107 additions & 14 deletions geoparquet.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,36 @@ import (
"sync"

"github.com/apache/arrow/go/v16/parquet"
"github.com/sfomuseum/go-edtf"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
"github.com/whosonfirst/go-whosonfirst-spr/v2"
spr_util "github.com/whosonfirst/go-whosonfirst-spr/v2/util"
"github.com/whosonfirst/go-writer/v3"
"github.com/whosonfirst/gpq-fork/not-internal/geo"
"github.com/whosonfirst/gpq-fork/not-internal/geojson"
"github.com/whosonfirst/gpq-fork/not-internal/geoparquet"
"github.com/whosonfirst/gpq-fork/not-internal/pqutil"
)

// This is to account for the disconnect in the JSON-encoded properties between
// whosonfirst/go-whosonfirst-spr/v2.WOFStandardPlacesResult and WOFAltStandardPlacesResult
// Since the latter already has a "v3"-triggering bug (described below) that might also
// be the chance to change the default/expected properties for alt files; that will have
// a bunch of downstream side-effects so it's still TBD. Until then... this:
var ensure_alt_properties = map[string]any{
"edtf:inception": edtf.UNKNOWN,
"edtf:cessation": edtf.UNKNOWN,
"wof:country": "",
"wof:supersedes": []int64{},
"wof:supersedeb_by": []int64{},
"mz:is_current": -1,
"mz:is_ceased": -1,
"mz:is_deprecated": -1,
"mz:is_superseded": -1,
"mz:is_superseding": -1,
"wof:lastmodified": 0,
}

// GeoParquetWriter implements the `writer.Writer` interface for writing GeoParquet records.
type GeoParquetWriter struct {
writer.Writer
Expand Down Expand Up @@ -144,6 +164,8 @@ func (gpq *GeoParquetWriter) Write(ctx context.Context, key string, r io.ReadSee
return 0, fmt.Errorf("Failed to read body for %s, %w", key, err)
}

is_alt := false

wof_spr, err := spr.WhosOnFirstSPR(body)

if err != nil {
Expand All @@ -155,40 +177,105 @@ func (gpq *GeoParquetWriter) Write(ctx context.Context, key string, r io.ReadSee
}

wof_spr = alt_spr
is_alt = true
}

spr_map, err := spr_util.SPRToMap(wof_spr)
// START OF wrangle properties in to something GeoParquet can work with

old_props := gjson.GetBytes(body, "properties")

body, err = sjson.SetBytes(body, "properties", wof_spr)

if err != nil {
return 0, fmt.Errorf("Failed to convert SPR to map for %s, %w", key, err)
return 0, fmt.Errorf("Failed to update properties for %s, %w", key, err)
}

if len(gpq.append_properties) > 0 {

for _, rel_path := range gpq.append_properties {

// Because we are deriving this from old_props and not body
// rel_path := strings.Replace(path, "properties.", "", 1)

p_rsp := old_props.Get(rel_path)
abs_path := fmt.Sprintf("properties.%s", rel_path)
rsp := gjson.GetBytes(body, abs_path)

spr_map[rel_path] = rsp.String()
// See this? We're assign a value even it doesn't exist because if we
// don't then we end up with uneven properties counts and Parquet is sad.
body, err = sjson.SetBytes(body, abs_path, p_rsp.Value())

if err != nil {
return 0, fmt.Errorf("Failed to assign %s to properties, %w", abs_path, err)
}
}
}

var f *geo.Feature
// Because the (internal) geoparquet/arrow schema builder is sad when it encounters empty arrays
// https://github.com/planetlabs/gpq/blob/main/internal/pqutil/arrow.go#L158-L165

err = json.Unmarshal(body, &f)
ensure_length := []string{
"properties.wof:supersedes",
"properties.wof:superseded_by",
}

if err != nil {
return 0, fmt.Errorf("Failed to unmarshal Feature from %s, %w", key, err)
for _, path := range ensure_length {

rsp := gjson.GetBytes(body, path)

if !rsp.Exists() {
continue
}

if len(rsp.Array()) == 0 {

body, err = sjson.DeleteBytes(body, path)

if err != nil {
return 0, fmt.Errorf("Failed to delete 0-length %s property, %w", path, err)
}
}
}

gpq_props := make(map[string]any)
if is_alt {

for k, v := range spr_map {
gpq_props[k] = v
// Account for a bug in whosonfirst/go-whosonfirst-spr/v2.WOFAltStandardPlacesResult
// where the JSON encoding for wof:id returns a string instead of an int. Fixing this
// will trigger a "v3" event so until then... this:
id_rsp := gjson.GetBytes(body, "properties.wof:id")
body, err = sjson.SetBytes(body, "properties.wof:id", id_rsp.Int())

if err != nil {
return 0, fmt.Errorf("Failed to correct string wof:id value in alt record, %w", err)
}

// See notes for ensure_alt_properties above
for rel_path, v := range ensure_alt_properties {

path := fmt.Sprintf("propeties.%s", rel_path)

rsp := gjson.GetBytes(body, path)

if rsp.Exists() {
continue
}

body, err = sjson.SetBytes(body, path, v)

if err != nil {
return 0, fmt.Errorf("Failed to assign default alt value (%v) for %s, %w", v, path, err)
}
}
}

f.Properties = gpq_props
// END OF wrangle properties in to something GeoParquet can work with

var f *geo.Feature

err = json.Unmarshal(body, &f)

if err != nil {
return 0, fmt.Errorf("Failed to unmarshal Feature from %s, %w", key, err)
}

ready, err := gpq.ensureFeatureWriter(ctx, f)

Expand All @@ -207,7 +294,7 @@ func (gpq *GeoParquetWriter) Write(ctx context.Context, key string, r io.ReadSee
err = gpq.flushBuffer(ctx)

if err != nil {
return -1, fmt.Errorf("Failed to flush pending buffer (%s), %w", key, err)
return 0, fmt.Errorf("Failed to flush pending buffer (%s), %w", key, err)
}

err = gpq.feature_writer.Write(f)
Expand Down Expand Up @@ -255,6 +342,12 @@ func (gpq *GeoParquetWriter) ensureFeatureWriter(ctx context.Context, f *geo.Fea

builder.Add(f.Properties)

err := builder.AddGeometry(geoparquet.DefaultGeometryColumn, geoparquet.DefaultGeometryEncoding)

if err != nil {
return false, err
}

if !builder.Ready() {
return false, nil
}
Expand Down
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ toolchain go1.23.0

require (
github.com/apache/arrow/go/v16 v16.1.0
github.com/sfomuseum/go-edtf v1.1.1
github.com/tidwall/gjson v1.17.0
github.com/tidwall/sjson v1.2.5
github.com/whosonfirst/go-ioutil v1.0.2
github.com/whosonfirst/go-whosonfirst-spr/v2 v2.3.7
github.com/whosonfirst/go-writer/v3 v3.1.1
Expand All @@ -31,7 +33,6 @@ require (
github.com/paulmach/orb v0.11.1 // indirect
github.com/pierrec/lz4/v4 v4.1.21 // indirect
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 // indirect
github.com/sfomuseum/go-edtf v1.1.1 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
github.com/whosonfirst/go-whosonfirst-feature v0.0.27 // indirect
Expand Down
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,16 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/gjson v1.17.0 h1:/Jocvlh98kcTfpN2+JzGQWQcqrPQwDrVEMApx/M5ZwM=
github.com/tidwall/gjson v1.17.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
github.com/whosonfirst/go-ioutil v1.0.2 h1:+GJPfa42OFn5A+5yJSc5jQTQIkNV3/MhYyg4pavdrC8=
github.com/whosonfirst/go-ioutil v1.0.2/go.mod h1:2dS1vWdAIkiHDvDF8fYyjv6k2NISmwaIjJJeEDBEdvg=
github.com/whosonfirst/go-whosonfirst-feature v0.0.27 h1:8RoiadvQEo8RFq8HFezq/Mwm/7UXR+dNJpE9oP8kvfQ=
Expand Down
21 changes: 21 additions & 0 deletions vendor/github.com/tidwall/sjson/LICENSE

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f1478ed

Please sign in to comment.