From 124044234b65ea22b1414e84d5157e3826f41957 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 15 Dec 2024 22:03:26 -0500 Subject: [PATCH] GEOS-provided boolean ops --- rust/geoarrow/src/array/geometry/array.rs | 24 +++++ rust/geodatafusion/Cargo.toml | 2 + rust/geodatafusion/src/data_types.rs | 20 +++++ rust/geodatafusion/src/udf/geos/mod.rs | 2 + .../geos/spatial_relationships/contains.rs | 87 +++++++++++++++++++ .../src/udf/geos/spatial_relationships/mod.rs | 1 + rust/geodatafusion/src/udf/mod.rs | 1 + 7 files changed, 137 insertions(+) create mode 100644 rust/geodatafusion/src/udf/geos/spatial_relationships/contains.rs create mode 100644 rust/geodatafusion/src/udf/geos/spatial_relationships/mod.rs diff --git a/rust/geoarrow/src/array/geometry/array.rs b/rust/geoarrow/src/array/geometry/array.rs index b1c0fce6a..2418838f4 100644 --- a/rust/geoarrow/src/array/geometry/array.rs +++ b/rust/geoarrow/src/array/geometry/array.rs @@ -1482,6 +1482,30 @@ impl TryFrom for MixedGeometryArray { } } +impl From for GeometryArray { + fn from(value: RectArray) -> Self { + PolygonArray::from(value).into() + } +} + +impl From> for GeometryArray { + fn from(value: Arc) -> Self { + use NativeType::*; + + match value.data_type() { + Point(_, _) => value.as_ref().as_point().clone().into(), + LineString(_, _) => value.as_ref().as_line_string().clone().into(), + Polygon(_, _) => value.as_ref().as_polygon().clone().into(), + MultiPoint(_, _) => value.as_ref().as_multi_point().clone().into(), + MultiLineString(_, _) => value.as_ref().as_multi_line_string().clone().into(), + MultiPolygon(_, _) => value.as_ref().as_multi_polygon().clone().into(), + Geometry(_) => value.as_ref().as_geometry().clone(), + GeometryCollection(_, _) => value.as_ref().as_geometry_collection().clone().into(), + Rect(_) => value.as_ref().as_rect().clone().into(), + } + } +} + /// Default to an empty array impl Default for GeometryArray { fn default() -> Self { diff --git a/rust/geodatafusion/Cargo.toml b/rust/geodatafusion/Cargo.toml index 2f99ee6b0..bbdc357e5 100644 --- a/rust/geodatafusion/Cargo.toml +++ b/rust/geodatafusion/Cargo.toml @@ -10,6 +10,8 @@ description = "Rust implementation of GeoArrow" categories = ["science::geo"] rust-version = "1.82" +[features] +geos = ["geoarrow/geos"] [dependencies] datafusion = { git = "https://github.com/apache/datafusion", rev = "03e39da62e403e064d21b57e9d6c200464c03749" } diff --git a/rust/geodatafusion/src/data_types.rs b/rust/geodatafusion/src/data_types.rs index 709597886..c0241f0d0 100644 --- a/rust/geodatafusion/src/data_types.rs +++ b/rust/geodatafusion/src/data_types.rs @@ -29,6 +29,22 @@ pub(crate) fn any_single_geometry_type_input() -> Signature { ) } +pub(crate) fn any_two_geometry_type_input() -> Signature { + // TODO: not sure if this is correct. We want the types to vary and each one can be a different + // type. + Signature::uniform( + 2, + vec![ + POINT2D_TYPE.into(), + POINT3D_TYPE.into(), + BOX2D_TYPE.into(), + BOX3D_TYPE.into(), + GEOMETRY_TYPE.into(), + ], + Volatility::Immutable, + ) +} + /// This will not cast a PointArray to a GeometryArray pub(crate) fn parse_to_native_array(array: ArrayRef) -> GeoDataFusionResult> { let data_type = array.data_type(); @@ -50,3 +66,7 @@ pub(crate) fn parse_to_native_array(array: ArrayRef) -> GeoDataFusionResult GeoDataFusionResult { + Ok(parse_to_native_array(array)?.into()) +} diff --git a/rust/geodatafusion/src/udf/geos/mod.rs b/rust/geodatafusion/src/udf/geos/mod.rs index 2491e36c5..ee82cda62 100644 --- a/rust/geodatafusion/src/udf/geos/mod.rs +++ b/rust/geodatafusion/src/udf/geos/mod.rs @@ -1 +1,3 @@ //! User-defined functions that wrap the [geos] crate. + +mod spatial_relationships; diff --git a/rust/geodatafusion/src/udf/geos/spatial_relationships/contains.rs b/rust/geodatafusion/src/udf/geos/spatial_relationships/contains.rs new file mode 100644 index 000000000..696688b2b --- /dev/null +++ b/rust/geodatafusion/src/udf/geos/spatial_relationships/contains.rs @@ -0,0 +1,87 @@ +use std::any::Any; +use std::sync::{Arc, OnceLock}; + +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature}; +use geoarrow::algorithm::geos::{BooleanOps, BooleanOpsScalar}; +use geoarrow::trait_::ArrayAccessor; +use geoarrow::ArrayBase; + +use crate::data_types::{ + any_single_geometry_type_input, any_two_geometry_type_input, parse_to_geometry_array, + parse_to_native_array, GEOMETRY_TYPE, +}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct Contains { + signature: Signature, +} + +impl Contains { + pub fn new() -> Self { + Self { + signature: any_two_geometry_type_input(), + } + } +} + +static CONTAINS_DOC: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for Contains { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_contains" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(DataType::Boolean) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(contains_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(CONTAINS_DOC.get_or_init(|| { + Documentation::builder( + DOC_SECTION_OTHER, + "Returns TRUE if geometry A contains geometry B.", + "ST_Contains(geometry)", + ) + .with_argument("geomA", "geometry") + .with_argument("geomB", "geometry") + .build() + })) + } +} + +fn contains_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let left = ColumnarValue::values_to_arrays(&args[0..1])? + .into_iter() + .next() + .unwrap(); + + let left = parse_to_geometry_array(left)?; + + let out = match &args[1] { + ColumnarValue::Array(arr) => { + let right = parse_to_geometry_array(arr.clone())?; + BooleanOps::contains(&left, &right)? + } + ColumnarValue::Scalar(scalar) => { + let right = parse_to_geometry_array(scalar.to_array()?)?; + let right = right.value(0); + BooleanOpsScalar::contains(&left, &right)? + } + }; + Ok(ColumnarValue::Array(Arc::new(out))) +} diff --git a/rust/geodatafusion/src/udf/geos/spatial_relationships/mod.rs b/rust/geodatafusion/src/udf/geos/spatial_relationships/mod.rs new file mode 100644 index 000000000..a5fe76adf --- /dev/null +++ b/rust/geodatafusion/src/udf/geos/spatial_relationships/mod.rs @@ -0,0 +1 @@ +mod contains; diff --git a/rust/geodatafusion/src/udf/mod.rs b/rust/geodatafusion/src/udf/mod.rs index 1a7a9bd7d..0ed35d70d 100644 --- a/rust/geodatafusion/src/udf/mod.rs +++ b/rust/geodatafusion/src/udf/mod.rs @@ -1,2 +1,3 @@ +#[cfg(feature = "geos")] pub mod geos; pub mod native;