From ebaa79a4925fdffddadb89b5d0fb2e5d4eeb138b Mon Sep 17 00:00:00 2001 From: Nemo Yu Date: Fri, 26 Jun 2026 17:26:16 -0400 Subject: [PATCH 1/2] feat: support geo multipolygon Signed-off-by: Nemo Yu --- Cargo.lock | 14 + Cargo.toml | 1 + vortex-geo/Cargo.toml | 1 + vortex-geo/src/extension/mod.rs | 4 + vortex-geo/src/extension/multipolygon.rs | 372 +++++++++++++++++++++++ vortex-geo/src/extension/point.rs | 52 +++- vortex-geo/src/extension/polygon.rs | 48 ++- vortex-geo/src/lib.rs | 4 + 8 files changed, 482 insertions(+), 14 deletions(-) create mode 100644 vortex-geo/src/extension/multipolygon.rs diff --git a/Cargo.lock b/Cargo.lock index 1ad8b82a5b4..fe315a74b7c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4238,6 +4238,19 @@ dependencies = [ "wkt 0.14.0", ] +[[package]] +name = "geoarrow-cast" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41c308d653690a4e8ef3cbba69696056bd819e624766ece66d64cc26a638acc1" +dependencies = [ + "arrow-schema 58.3.0", + "geo-traits", + "geoarrow-array", + "geoarrow-schema", + "wkt 0.14.0", +] + [[package]] name = "geoarrow-schema" version = "0.8.0" @@ -10394,6 +10407,7 @@ dependencies = [ "geo-traits", "geo-types", "geoarrow", + "geoarrow-cast", "prost 0.14.4", "rstest", "vortex-array", diff --git a/Cargo.toml b/Cargo.toml index 903a6a46b10..f3c95e1b6c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -163,6 +163,7 @@ geo = "0.31.0" geo-traits = "0.3.0" geo-types = "0.7.19" geoarrow = "0.8.0" +geoarrow-cast = "0.8.0" get_dir = "0.5.0" glob = "0.3.2" goldenfile = "1" diff --git a/vortex-geo/Cargo.toml b/vortex-geo/Cargo.toml index e2f7e4dc10f..2f0583b49e6 100644 --- a/vortex-geo/Cargo.toml +++ b/vortex-geo/Cargo.toml @@ -20,6 +20,7 @@ geo = { workspace = true } geo-traits = { workspace = true } geo-types = { workspace = true } geoarrow = { workspace = true } +geoarrow-cast = { workspace = true } prost = { workspace = true } vortex-array = { workspace = true } vortex-error = { workspace = true } diff --git a/vortex-geo/src/extension/mod.rs b/vortex-geo/src/extension/mod.rs index 684c83bade0..5cccc489297 100644 --- a/vortex-geo/src/extension/mod.rs +++ b/vortex-geo/src/extension/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors pub(crate) mod coordinate; +mod multipolygon; mod point; mod polygon; mod wkb; @@ -12,6 +13,7 @@ use std::sync::Arc; use geo_types::Geometry; use geoarrow::datatypes::Crs; use geoarrow::datatypes::Metadata; +pub use multipolygon::*; pub use point::*; pub use polygon::*; use vortex_array::ArrayRef; @@ -46,6 +48,8 @@ pub(crate) fn geometries( point_geometries(&storage, ctx) } else if ext.is::() { polygon_geometries(&storage, ctx) + } else if ext.is::() { + multipolygon_geometries(&storage, ctx) } else { vortex_bail!("geo: unsupported geometry extension {}", array.dtype()) } diff --git a/vortex-geo/src/extension/multipolygon.rs b/vortex-geo/src/extension/multipolygon.rs new file mode 100644 index 00000000000..82fe081b316 --- /dev/null +++ b/vortex-geo/src/extension/multipolygon.rs @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! The [`MultiPolygon`] extension type (`vortex.geo.multipolygon`), stored as +//! `List>>>` (polygons → rings → coordinates) and tagged with +//! [`GeoMetadata`]. A single `Polygon` is a one-element multipolygon. + +use std::sync::Arc; + +use arrow_array::ArrayRef as ArrowArrayRef; +use arrow_schema::DataType; +use arrow_schema::Field; +use arrow_schema::extension::ExtensionType; +use geo_traits::to_geo::ToGeoGeometry; +use geo_types::Geometry; +use geoarrow::array::GeoArrowArray; +use geoarrow::array::GeoArrowArrayAccessor; +use geoarrow::array::IntoArrow; +use geoarrow::array::MultiPolygonArray; +use geoarrow::datatypes::CoordType; +use geoarrow::datatypes::GeoArrowType; +use geoarrow::datatypes::MultiPolygonType; +use geoarrow::datatypes::WkbType; +use geoarrow_cast::cast::cast; +use prost::Message; +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::ExtensionArray; +use vortex_array::arrays::extension::ExtensionArrayExt; +use vortex_array::arrow::ArrowExport; +use vortex_array::arrow::ArrowExportVTable; +use vortex_array::arrow::ArrowImport; +use vortex_array::arrow::ArrowImportVTable; +use vortex_array::arrow::ArrowSession; +use vortex_array::arrow::ArrowSessionExt; +use vortex_array::arrow::FromArrowArray; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_array::dtype::arrow::FromArrowType; +use vortex_array::dtype::extension::ExtDType; +use vortex_array::dtype::extension::ExtId; +use vortex_array::dtype::extension::ExtVTable; +use vortex_array::scalar::ScalarValue; +use vortex_error::VortexError; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; +use vortex_error::vortex_err; +use vortex_session::registry::CachedId; +use vortex_session::registry::Id; + +use super::GeoMetadata; +use super::coordinate::Dimension; +use super::coordinate::coordinate_dimension; +use super::coordinate::coordinate_storage_dtype; +use super::geo_metadata_from_arrow; +use super::geoarrow_metadata; + +/// A multipolygon (`geoarrow.multipolygon`); a single `Polygon` is a one-element multipolygon. +#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] +pub struct MultiPolygon; + +impl ExtVTable for MultiPolygon { + type Metadata = GeoMetadata; + // No cheap owned value like Point's `Coordinate`; expose the raw storage scalar. + type NativeValue<'a> = &'a ScalarValue; + + fn id(&self) -> ExtId { + ExtId::new_static("vortex.geo.multipolygon") + } + + fn serialize_metadata(&self, metadata: &Self::Metadata) -> VortexResult> { + Ok(metadata.encode_to_vec()) + } + + fn deserialize_metadata(&self, metadata: &[u8]) -> VortexResult { + Ok(GeoMetadata::decode(metadata)?) + } + + fn validate_dtype(ext_dtype: &ExtDType) -> VortexResult<()> { + multipolygon_dimension(ext_dtype.storage_dtype()).map(|_| ()) + } + + fn unpack_native<'a>( + _ext_dtype: &'a ExtDType, + storage_value: &'a ScalarValue, + ) -> VortexResult<&'a ScalarValue> { + Ok(storage_value) + } +} + +/// Storage `List>>`: polygons → rings → coordinates. +pub(crate) fn multipolygon_storage_dtype(dim: Dimension, nullability: Nullability) -> DType { + let coords = coordinate_storage_dtype(dim, Nullability::NonNullable); + let ring = DType::List(Arc::new(coords), Nullability::NonNullable); + let polygon = DType::List(Arc::new(ring), Nullability::NonNullable); + DType::List(Arc::new(polygon), nullability) +} + +/// Validate `dtype` is `List>>` and return its [`Dimension`]. +pub(crate) fn multipolygon_dimension(dtype: &DType) -> VortexResult { + let DType::List(polygon, _) = dtype else { + vortex_bail!("multipolygon storage must be a List of polygons, was {dtype}"); + }; + let DType::List(ring, _) = polygon.as_ref() else { + vortex_bail!("multipolygon polygon storage must be a List of rings, was {polygon}"); + }; + let DType::List(coords, _) = ring.as_ref() else { + vortex_bail!("multipolygon ring storage must be a List of coordinates, was {ring}"); + }; + coordinate_dimension(coords) +} + +static ARROW_MULTIPOLYGON: CachedId = CachedId::new(MultiPolygonType::NAME); + +/// The `geoarrow.multipolygon` type for `dimension`, with separated (struct) coordinates. +fn multipolygon_type(geo_metadata: &GeoMetadata, dimension: Dimension) -> MultiPolygonType { + MultiPolygonType::new(dimension.into(), geoarrow_metadata(geo_metadata)) +} + +/// Decode storage to `geo_types` for the geo scalar functions (CRS is irrelevant to planar ops). +pub(crate) fn multipolygon_geometries( + storage: &ArrayRef, + ctx: &mut ExecutionCtx, +) -> VortexResult>> { + multipolygon_array(storage, ctx)? + .iter() + .map(|geometry| -> VortexResult> { + Ok(geometry + .ok_or_else(|| vortex_err!("geo: null geometry is not supported"))? + .map_err(|e| vortex_err!("geo: geometry access failed: {e}"))? + .to_geometry()) + }) + .collect() +} + +/// Build a geoarrow `MultiPolygonArray` from the `MultiPolygon` storage. +fn multipolygon_array( + storage: &ArrayRef, + ctx: &mut ExecutionCtx, +) -> VortexResult { + let multipolygon_type = multipolygon_type( + &GeoMetadata::default(), + multipolygon_dimension(storage.dtype())?, + ); + let session = ctx.session().clone(); + let arrow = session.arrow().execute_arrow(storage.clone(), None, ctx)?; + MultiPolygonArray::try_from((arrow.as_ref(), multipolygon_type)) + .map_err(|e| vortex_err!("failed to construct MultiPolygonArray: {e}")) +} + +/// A validated `MultiPolygon` array (`try_from` checks the extension type). +pub struct MultiPolygonData(ExtensionArray); + +impl TryFrom for MultiPolygonData { + type Error = VortexError; + + fn try_from(ext: ExtensionArray) -> Result { + vortex_ensure!( + ext.ext_dtype().is::(), + "expected a MultiPolygon extension array" + ); + Ok(MultiPolygonData(ext)) + } +} + +impl MultiPolygonData { + /// Serialize multipolygons to WKB (a view array) via geoarrow's cast — the form DuckDB + /// `GEOMETRY` takes. + pub fn to_wkb(&self, ctx: &mut ExecutionCtx) -> VortexResult { + let multipolygons = multipolygon_array(&self.0.storage_array().clone(), ctx)?; + let wkb_type = + GeoArrowType::WkbView(WkbType::new(geoarrow_metadata(&GeoMetadata::default()))); + let wkb = cast(&multipolygons, &wkb_type) + .map_err(|e| vortex_err!("failed to cast multipolygons to WKB: {e}"))?; + ArrayRef::from_arrow(wkb.to_array_ref().as_ref(), false) + } +} + +impl ArrowExportVTable for MultiPolygon { + fn arrow_ext_id(&self) -> Id { + *ARROW_MULTIPOLYGON + } + + fn vortex_id(&self) -> Id { + self.id() + } + + fn to_arrow_field( + &self, + name: &str, + dtype: &DType, + session: &ArrowSession, + ) -> VortexResult> { + let ext_type = dtype.as_extension(); + let geo_metadata = ext_type.metadata::(); + let dimension = multipolygon_dimension(ext_type.storage_dtype())?; + + let mut field = session.to_arrow_field(name, ext_type.storage_dtype())?; + field.try_with_extension_type(multipolygon_type(geo_metadata, dimension))?; + + Ok(Some(field)) + } + + fn execute_arrow( + &self, + array: ArrayRef, + target: &Field, + ctx: &mut ExecutionCtx, + ) -> VortexResult { + let is_multipolygon = array + .dtype() + .as_extension_opt() + .map(|ext| ext.is::()) + .unwrap_or(false); + if !is_multipolygon { + return Ok(ArrowExport::Unsupported(array)); + } + + let Ok(multipolygon_meta) = target.try_extension_type::() else { + return Ok(ArrowExport::Unsupported(array)); + }; + if multipolygon_meta.coord_type() != CoordType::Separated { + return Ok(ArrowExport::Unsupported(array)); + } + + let executed = array.execute::(ctx)?; + let storage = executed.storage_array().clone(); + + let storage_field = Field::new( + String::new(), + target.data_type().clone(), + target.is_nullable(), + ); + let session = ctx.session().clone(); + let arrow_storage = session + .arrow() + .execute_arrow(storage, Some(&storage_field), ctx)?; + + // Round-trip through GeoArrow's multipolygon array; `into_arrow` is concrete, so wrap in `Arc`. + let multipolygons = + MultiPolygonArray::try_from((arrow_storage.as_ref(), multipolygon_meta)) + .map_err(|e| vortex_err!("failed to construct MultiPolygonArray: {e}"))?; + + Ok(ArrowExport::Exported(Arc::new(multipolygons.into_arrow()))) + } +} + +impl ArrowImportVTable for MultiPolygon { + fn arrow_ext_id(&self) -> Id { + *ARROW_MULTIPOLYGON + } + + /// Import a `geoarrow.multipolygon` field (matched by GeoArrow name). Accepts the full + /// `MultiPolygonType`, or a metadata-less literal (name only), inferring the dimension. + fn from_arrow_field(&self, field: &Field) -> VortexResult> { + let (dimension, metadata) = + if let Ok(multipolygon_meta) = field.try_extension_type::() { + vortex_ensure!( + multipolygon_meta.coord_type() == CoordType::Separated, + "geoarrow.multipolygon with interleaved coordinates is not supported; \ + re-encode with separated (struct) coordinates" + ); + ( + multipolygon_meta.dimension().into(), + geo_metadata_from_arrow(multipolygon_meta.metadata()), + ) + } else { + // Literal: peel the three `List` layers to the coordinate struct and read its + // dimension from the field names (the canonical check rejects nullable coordinates). + if field.extension_type_name() != Some(MultiPolygonType::NAME) { + return Ok(None); + } + let DType::List(polygon, _) = DType::from_arrow(field) else { + return Ok(None); + }; + let DType::List(ring, _) = polygon.as_ref() else { + return Ok(None); + }; + let DType::List(coords, _) = ring.as_ref() else { + return Ok(None); + }; + let DType::Struct(fields, _) = coords.as_ref() else { + return Ok(None); + }; + let Ok(dimension) = Dimension::from_field_names(fields.names()) else { + return Ok(None); + }; + (dimension, GeoMetadata::default()) + }; + + let storage_dtype = multipolygon_storage_dtype(dimension, field.is_nullable().into()); + Ok(Some(DType::Extension( + ExtDType::try_with_vtable(MultiPolygon, metadata, storage_dtype)?.erased(), + ))) + } + + fn from_arrow_array( + &self, + array: ArrowArrayRef, + field: &Field, + dtype: &DType, + ) -> VortexResult { + let Some(ext_dtype) = dtype.as_extension_opt() else { + return Ok(ArrowImport::Unsupported(array)); + }; + if !ext_dtype.is::() + || field.try_extension_type::().is_err() + || !matches!(array.data_type(), DataType::List(_)) + { + return Ok(ArrowImport::Unsupported(array)); + } + + let storage = ArrayRef::from_arrow(array.as_ref(), field.is_nullable())?; + Ok(ArrowImport::Imported( + ExtensionArray::try_new(ext_dtype.clone(), storage)?.into_array(), + )) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use rstest::rstest; + use vortex_array::dtype::DType; + use vortex_array::dtype::Nullability; + use vortex_array::dtype::PType; + use vortex_array::dtype::extension::ExtDType; + use vortex_error::VortexResult; + + use super::MultiPolygon; + use super::multipolygon_storage_dtype; + use crate::extension::GeoMetadata; + use crate::extension::coordinate::Dimension; + use crate::extension::coordinate::coordinate_storage_dtype; + + fn geo_meta() -> GeoMetadata { + GeoMetadata { + crs: Some("EPSG:4326".to_string()), + } + } + + /// `MultiPolygon` accepts the canonical `List>>` storage of every + /// dimension. + #[rstest] + #[case::xy(Dimension::Xy)] + #[case::xyz(Dimension::Xyz)] + #[case::xym(Dimension::Xym)] + #[case::xyzm(Dimension::Xyzm)] + fn multipolygon_validates_every_dimension(#[case] dim: Dimension) -> VortexResult<()> { + let storage = multipolygon_storage_dtype(dim, Nullability::NonNullable); + ExtDType::::try_new(geo_meta(), storage)?; + Ok(()) + } + + /// Non-multipolygon storage is rejected at dtype construction: a bare struct (point) and a + /// double list (polygon) both fail. + #[test] + fn multipolygon_rejects_invalid_storage() -> VortexResult<()> { + let primitive = DType::Primitive(PType::F64, Nullability::NonNullable); + assert!(ExtDType::::try_new(geo_meta(), primitive).is_err()); + + // A double list (polygon) is not a multipolygon. + let coords = coordinate_storage_dtype(Dimension::Xy, Nullability::NonNullable); + let ring = DType::List(Arc::new(coords), Nullability::NonNullable); + let polygon = DType::List(Arc::new(ring), Nullability::NonNullable); + assert!(ExtDType::::try_new(geo_meta(), polygon).is_err()); + Ok(()) + } +} diff --git a/vortex-geo/src/extension/point.rs b/vortex-geo/src/extension/point.rs index 19e33c212f5..470182fe0ed 100644 --- a/vortex-geo/src/extension/point.rs +++ b/vortex-geo/src/extension/point.rs @@ -12,11 +12,15 @@ use arrow_schema::Field; use arrow_schema::extension::ExtensionType; use geo_traits::to_geo::ToGeoGeometry; use geo_types::Geometry; +use geoarrow::array::GeoArrowArray; use geoarrow::array::GeoArrowArrayAccessor; use geoarrow::array::IntoArrow; use geoarrow::array::PointArray; use geoarrow::datatypes::CoordType; +use geoarrow::datatypes::GeoArrowType; use geoarrow::datatypes::PointType; +use geoarrow::datatypes::WkbType; +use geoarrow_cast::cast::cast; use prost::Message; use vortex_array::ArrayRef; use vortex_array::ExecutionCtx; @@ -37,6 +41,7 @@ use vortex_array::dtype::extension::ExtId; use vortex_array::dtype::extension::ExtVTable; use vortex_array::scalar::Scalar; use vortex_array::scalar::ScalarValue; +use vortex_error::VortexError; use vortex_error::VortexResult; use vortex_error::vortex_ensure; use vortex_error::vortex_err; @@ -96,20 +101,51 @@ fn point_type(geo_metadata: &GeoMetadata, dimension: Dimension) -> PointType { PointType::new(dimension.into(), geoarrow_metadata(geo_metadata)) } -/// Decode `Point` storage to `geo_types` points, for the geo scalar functions. -pub(crate) fn point_geometries( - storage: &ArrayRef, - ctx: &mut ExecutionCtx, -) -> VortexResult>> { +pub struct PointData(ExtensionArray); + +impl TryFrom for PointData { + type Error = VortexError; + + fn try_from(ext: ExtensionArray) -> Result { + vortex_ensure!( + ext.ext_dtype().is::(), + "expected a Point extension array" + ); + Ok(PointData(ext)) + } +} + +impl PointData { + /// Serialize points to WKB (a view array) via geoarrow's cast — the form DuckDB `GEOMETRY` takes. + pub fn to_wkb(&self, ctx: &mut ExecutionCtx) -> VortexResult { + let points = point_array(&self.0.storage_array().clone(), ctx)?; + let wkb_type = + GeoArrowType::WkbView(WkbType::new(geoarrow_metadata(&GeoMetadata::default()))); + let wkb = cast(&points, &wkb_type) + .map_err(|e| vortex_err!("failed to cast points to WKB: {e}"))?; + ArrayRef::from_arrow(wkb.to_array_ref().as_ref(), false) + } +} + +/// Build a geoarrow `PointArray` from a `Point`'s `Struct` storage, shared by WKB export +/// and `geo_types` decoding. +fn point_array(storage: &ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { let point_type = point_type( &GeoMetadata::default(), coordinate_dimension(storage.dtype())?, ); let session = ctx.session().clone(); let arrow = session.arrow().execute_arrow(storage.clone(), None, ctx)?; - let points = PointArray::try_from((arrow.as_ref(), point_type)) - .map_err(|e| vortex_err!("failed to construct PointArray: {e}"))?; - points + PointArray::try_from((arrow.as_ref(), point_type)) + .map_err(|e| vortex_err!("failed to construct PointArray: {e}")) +} + +/// Decode `Point` storage to `geo_types` points, for the geo scalar functions. +pub(crate) fn point_geometries( + storage: &ArrayRef, + ctx: &mut ExecutionCtx, +) -> VortexResult>> { + point_array(storage, ctx)? .iter() .map(|geometry| -> VortexResult> { Ok(geometry diff --git a/vortex-geo/src/extension/polygon.rs b/vortex-geo/src/extension/polygon.rs index fc06ce59bd3..8d8a88fea17 100644 --- a/vortex-geo/src/extension/polygon.rs +++ b/vortex-geo/src/extension/polygon.rs @@ -13,11 +13,15 @@ use arrow_schema::Field; use arrow_schema::extension::ExtensionType; use geo_traits::to_geo::ToGeoGeometry; use geo_types::Geometry; +use geoarrow::array::GeoArrowArray; use geoarrow::array::GeoArrowArrayAccessor; use geoarrow::array::IntoArrow; use geoarrow::array::PolygonArray; use geoarrow::datatypes::CoordType; +use geoarrow::datatypes::GeoArrowType; use geoarrow::datatypes::PolygonType; +use geoarrow::datatypes::WkbType; +use geoarrow_cast::cast::cast; use prost::Message; use vortex_array::ArrayRef; use vortex_array::ExecutionCtx; @@ -38,6 +42,7 @@ use vortex_array::dtype::extension::ExtDType; use vortex_array::dtype::extension::ExtId; use vortex_array::dtype::extension::ExtVTable; use vortex_array::scalar::ScalarValue; +use vortex_error::VortexError; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_ensure; @@ -117,12 +122,7 @@ pub(crate) fn polygon_geometries( storage: &ArrayRef, ctx: &mut ExecutionCtx, ) -> VortexResult>> { - let polygon_type = polygon_type(&GeoMetadata::default(), polygon_dimension(storage.dtype())?); - let session = ctx.session().clone(); - let arrow = session.arrow().execute_arrow(storage.clone(), None, ctx)?; - let polygons = PolygonArray::try_from((arrow.as_ref(), polygon_type)) - .map_err(|e| vortex_err!("failed to construct PolygonArray: {e}"))?; - polygons + polygon_array(storage, ctx)? .iter() .map(|geometry| -> VortexResult> { Ok(geometry @@ -133,6 +133,42 @@ pub(crate) fn polygon_geometries( .collect() } +/// Build a geoarrow `PolygonArray` from a `Polygon`'s `List>` storage. +fn polygon_array(storage: &ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { + let polygon_type = polygon_type(&GeoMetadata::default(), polygon_dimension(storage.dtype())?); + let session = ctx.session().clone(); + let arrow = session.arrow().execute_arrow(storage.clone(), None, ctx)?; + PolygonArray::try_from((arrow.as_ref(), polygon_type)) + .map_err(|e| vortex_err!("failed to construct PolygonArray: {e}")) +} + +/// A validated `Polygon` array (`try_from` checks the extension type). +pub struct PolygonData(ExtensionArray); + +impl TryFrom for PolygonData { + type Error = VortexError; + + fn try_from(ext: ExtensionArray) -> Result { + vortex_ensure!( + ext.ext_dtype().is::(), + "expected a Polygon extension array" + ); + Ok(PolygonData(ext)) + } +} + +impl PolygonData { + /// Serialize polygons to WKB (a view array) via geoarrow's cast — the form DuckDB `GEOMETRY` takes. + pub fn to_wkb(&self, ctx: &mut ExecutionCtx) -> VortexResult { + let polygons = polygon_array(&self.0.storage_array().clone(), ctx)?; + let wkb_type = + GeoArrowType::WkbView(WkbType::new(geoarrow_metadata(&GeoMetadata::default()))); + let wkb = cast(&polygons, &wkb_type) + .map_err(|e| vortex_err!("failed to cast polygons to WKB: {e}"))?; + ArrayRef::from_arrow(wkb.to_array_ref().as_ref(), false) + } +} + impl ArrowExportVTable for Polygon { fn arrow_ext_id(&self) -> Id { *ARROW_POLYGON diff --git a/vortex-geo/src/lib.rs b/vortex-geo/src/lib.rs index 951d93b7b4f..2cc8004efc5 100644 --- a/vortex-geo/src/lib.rs +++ b/vortex-geo/src/lib.rs @@ -8,6 +8,7 @@ use vortex_array::dtype::session::DTypeSessionExt; use vortex_array::scalar_fn::session::ScalarFnSessionExt; use vortex_session::VortexSession; +use crate::extension::MultiPolygon; use crate::extension::Point; use crate::extension::Polygon; use crate::extension::WellKnownBinary; @@ -32,6 +33,9 @@ pub fn initialize(session: &VortexSession) { session.dtypes().register(Polygon); session.arrow().register_exporter(Arc::new(Polygon)); session.arrow().register_importer(Arc::new(Polygon)); + session.dtypes().register(MultiPolygon); + session.arrow().register_exporter(Arc::new(MultiPolygon)); + session.arrow().register_importer(Arc::new(MultiPolygon)); // Register the geometry scalar functions. session.scalar_fns().register(GeoDistance); From a3c046b2b21b27ab4069c28729aa626c9fca52ae Mon Sep 17 00:00:00 2001 From: Nemo Yu Date: Fri, 26 Jun 2026 17:47:35 -0400 Subject: [PATCH 2/2] test: add tests for multipolygon Signed-off-by: Nemo Yu --- vortex-geo/src/tests/mod.rs | 1 + vortex-geo/src/tests/multipolygon.rs | 94 ++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 vortex-geo/src/tests/multipolygon.rs diff --git a/vortex-geo/src/tests/mod.rs b/vortex-geo/src/tests/mod.rs index 546de758eba..87b25ed1293 100644 --- a/vortex-geo/src/tests/mod.rs +++ b/vortex-geo/src/tests/mod.rs @@ -4,6 +4,7 @@ //! Arrow interop tests for the geospatial extension types, exercising the session wiring set up //! by [`crate::initialize`]. +mod multipolygon; mod point; mod wkb; diff --git a/vortex-geo/src/tests/multipolygon.rs b/vortex-geo/src/tests/multipolygon.rs new file mode 100644 index 00000000000..38f2543a96a --- /dev/null +++ b/vortex-geo/src/tests/multipolygon.rs @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Arrow interop for the `vortex.geo.multipolygon` extension type (`geoarrow.multipolygon`). + +use std::sync::Arc; + +use arrow_schema::DataType; +use arrow_schema::Field; +use arrow_schema::extension::ExtensionType as _; +use geoarrow::datatypes::CoordType; +use geoarrow::datatypes::Crs; +use geoarrow::datatypes::Dimension as GeoArrowDimension; +use geoarrow::datatypes::Metadata; +use geoarrow::datatypes::MultiPolygonType; +use vortex_array::arrow::ArrowSessionExt; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_error::VortexResult; + +use super::SESSION; +use crate::extension::MultiPolygon; + +/// A `geoarrow.multipolygon` Arrow field with separated (struct) XY coordinates. +fn multipolygon_field(name: &str, nullable: bool, crs: Option<&str>) -> Field { + let crs = crs + .map(|crs| Crs::from_unknown_crs_type(crs.to_string())) + .unwrap_or_default(); + let metadata = Arc::new(Metadata::new(crs, None)); + MultiPolygonType::new(GeoArrowDimension::XY, metadata).to_field(name, nullable) +} + +/// An imported `geoarrow.multipolygon` field maps to the MultiPolygon extension dtype, recovering the +/// CRS, the `List>>>` storage, and nullability. +#[test] +fn import_field_recovers_extension() -> VortexResult<()> { + let field = multipolygon_field("geom", true, Some("EPSG:4326")); + let dtype = SESSION.arrow().from_arrow_field(&field)?; + + let DType::Extension(ext) = &dtype else { + panic!("expected Extension dtype, got {dtype}"); + }; + assert!(ext.is::()); + assert_eq!( + ext.metadata::().crs.as_deref(), + Some("EPSG:4326") + ); + + // Storage peels three List layers (multipolygon → polygons → rings) to the coordinate struct. + let DType::List(polygons, nullability) = ext.storage_dtype() else { + panic!("expected List storage, got {}", ext.storage_dtype()); + }; + assert_eq!(*nullability, Nullability::Nullable); + let DType::List(rings, _) = polygons.as_ref() else { + panic!("expected List of polygons"); + }; + let DType::List(coords, _) = rings.as_ref() else { + panic!("expected List of rings"); + }; + let DType::Struct(fields, _) = coords.as_ref() else { + panic!("expected coordinate Struct"); + }; + let names: Vec<&str> = fields.names().iter().map(|n| n.as_ref()).collect(); + assert_eq!(names, vec!["x", "y"]); + Ok(()) +} + +/// A field with interleaved (`FixedSizeList`) coordinates fails to import. +#[test] +fn import_interleaved_field_fails() { + let multipolygon_type = MultiPolygonType::new(GeoArrowDimension::XY, Default::default()) + .with_coord_type(CoordType::Interleaved); + let field = multipolygon_type.to_field("geom", false); + assert!(SESSION.arrow().from_arrow_field(&field).is_err()); +} + +/// A field imported to the MultiPolygon dtype and exported back carries the `geoarrow.multipolygon` +/// extension over its `List` storage. +#[test] +fn export_field_carries_extension() -> VortexResult<()> { + let imported = + SESSION + .arrow() + .from_arrow_field(&multipolygon_field("geom", false, Some("EPSG:4326")))?; + let field = SESSION.arrow().to_arrow_field("geom", &imported)?; + + assert_eq!(field.extension_type_name(), Some(MultiPolygonType::NAME)); + assert!( + matches!(field.data_type(), DataType::List(_)), + "expected List storage, got {}", + field.data_type() + ); + Ok(()) +}