Skip to content

Commit 1f4506b

Browse files
authored
fix(c/sedona-geos): Support export of geometries with M values from GEOS (#640)
1 parent 301086f commit 1f4506b

5 files changed

Lines changed: 61 additions & 89 deletions

File tree

Cargo.lock

Lines changed: 13 additions & 48 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -60,22 +60,21 @@ keywords = ["geospatial", "gis", "spatial", "datafusion", "arrow"]
6060
categories = ["science::geo", "database"]
6161

6262
[workspace.dependencies]
63-
approx = "0.5"
6463
adbc_core = ">=0.22.0"
6564
adbc_ffi = ">=0.22.0"
66-
lru = "0.16"
65+
approx = "0.5"
6766
arrow = { version = "57.0.0", features = ["prettyprint", "ffi", "chrono-tz"] }
6867
arrow-array = { version = "57.0.0" }
68+
arrow-buffer = { version = "57.0.0" }
6969
arrow-cast = { version = "57.0.0" }
7070
arrow-data = { version = "57.0.0" }
7171
arrow-ipc = { version = "57.0.0" }
7272
arrow-json = { version = "57.0.0" }
7373
arrow-schema = { version = "57.0.0" }
74-
arrow-buffer = { version = "57.0.0" }
7574
async-trait = { version = "0.1.87" }
7675
bytemuck = "1.25"
77-
bytes = "1.11"
7876
byteorder = "1"
77+
bytes = "1.11"
7978
chrono = { version = "0.4.41", default-features = false }
8079
comfy-table = { version = "7.2" }
8180
criterion = { version = "0.8", features = ["html_reports"] }
@@ -94,36 +93,26 @@ datafusion-physical-plan = { version = "51.0.0" }
9493
datafusion-pruning = { version = "51.0.0" }
9594
dirs = "6.0.0"
9695
env_logger = "0.11"
97-
log = "^0.4"
9896
fastrand = "2.0"
97+
float_next_after = "2"
9998
futures = "0.3"
100-
pin-project-lite = "0.2"
99+
geo = "0.31.0"
100+
geo-index = { version = "0.3.3", features = ["use-geo_0_31"] }
101+
geo-traits = "0.3.0"
102+
geo-types = "0.7.17"
103+
geojson = "0.24.2"
104+
geos = { version = "11.0.0", features = ["geo", "v3_12_0"] }
101105
glam = "0.32.0"
102-
object_store = { version = "0.12.4", default-features = false }
103-
float_next_after = "2"
104-
num-traits = { version = "0.2", default-features = false, features = ["libm"] }
105-
mimalloc = { version = "0.1", default-features = false }
106106
libmimalloc-sys = { version = "0.1", default-features = false }
107+
log = "^0.4"
108+
lru = "0.16"
109+
mimalloc = { version = "0.1", default-features = false }
110+
num-traits = { version = "0.2", default-features = false, features = ["libm"] }
111+
object_store = { version = "0.12.4", default-features = false }
107112
once_cell = "1.20"
108-
109-
geos = { git="https://github.com/georust/geos.git", rev="47afbad2483e489911ddb456417808340e9342c3", features = ["geo", "v3_12_0"] }
110-
111-
geo-types = "0.7.17"
112-
geo-traits = "0.3.0"
113-
geo = "0.31.0"
114-
geojson = "0.24.2"
115-
116-
geo-index = { version = "0.3.3", features = ["use-geo_0_31"] }
117-
118-
wkb = "0.9.2"
119-
wkt = "0.14.0"
120-
121113
parking_lot = "0.12"
122-
parquet = { version = "57.0.0", default-features = false, features = [
123-
"arrow",
124-
"async",
125-
"object_store",
126-
] }
114+
parquet = { version = "57.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
115+
pin-project-lite = "0.2"
127116
rand = "0.10"
128117
regex = "1.12"
129118
rstest = "0.26.1"
@@ -134,6 +123,8 @@ tempfile = { version = "3"}
134123
thiserror = { version = "2" }
135124
tokio = { version = "1.48", features = ["macros", "rt", "sync"] }
136125
url = "2.5.7"
126+
wkb = "0.9.2"
127+
wkt = "0.14.0"
137128

138129
# Workspace path dependencies for internal crates
139130
sedona = { version = "0.3.0", path = "rust/sedona" }

c/sedona-geos/src/geos_to_wkb.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ use std::io::Write;
1919
use byteorder::{LittleEndian, WriteBytesExt};
2020
use datafusion_common::{error::Result, DataFusionError};
2121
use geo_traits::Dimensions;
22-
use geos::{Geom, Geometry, GeometryTypes};
22+
use geos::{CoordType, Geom, Geometry, GeometryTypes};
23+
use sedona_common::sedona_internal_err;
2324
use sedona_geometry::wkb_factory::{
2425
write_wkb_geometrycollection_header, write_wkb_linestring_header,
2526
write_wkb_multilinestring_header, write_wkb_multipoint_header, write_wkb_multipolygon_header,
@@ -265,8 +266,16 @@ fn write_coord_seq(
265266
dim: Dimensions,
266267
writer: &mut impl Write,
267268
) -> Result<()> {
269+
let coord_type = match dim {
270+
Dimensions::Xy => CoordType::XY,
271+
Dimensions::Xyz => CoordType::XYZ,
272+
Dimensions::Xym => CoordType::XYM,
273+
Dimensions::Xyzm => CoordType::XYZM,
274+
_ => return sedona_internal_err!("Unexpected dimensions {dim:?}"),
275+
};
276+
268277
let coords = coord_seq
269-
.as_buffer(Some(dim.size()))
278+
.as_buffer(Some(coord_type))
270279
.map_err(|e| DataFusionError::Execution(format!("Failed to get coord seq buffer: {e}")))?;
271280

272281
// Cast Vec<f64> to &[u8] so we can write the bytes directly to the writer buffer
@@ -339,6 +348,13 @@ mod tests {
339348
test_wkb_round_trip("LINESTRING Z (0 0 10, 1 1 11, 2 2 12)");
340349
}
341350

351+
#[test]
352+
fn test_write_linestring_xym() {
353+
test_wkb_round_trip("LINESTRING M (0 0 0, 1 1 1)");
354+
test_wkb_round_trip("LINESTRING M (0 0 0, 1 1 1, 2 2 2)");
355+
test_wkb_round_trip("LINESTRING M (0 0 10, 1 1 11, 2 2 12)");
356+
}
357+
342358
#[test]
343359
fn test_write_linestring_xyzm() {
344360
test_wkb_round_trip("LINESTRING ZM (0 0 1 2, 1 1 3 4)");

c/sedona-geos/src/wkb_to_geos.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use std::cell::RefCell;
1818

1919
use byteorder::{BigEndian, ByteOrder, LittleEndian};
2020
use geo_traits::*;
21-
use geos::GResult;
21+
use geos::{CoordType, GResult};
2222
use wkb::{reader::*, Endianness};
2323

2424
/// A factory for converting WKB to GEOS geometries.
@@ -215,11 +215,11 @@ fn create_coord_sequence_from_raw_parts(
215215
num_coords: usize,
216216
scratch: &mut Vec<f64>,
217217
) -> GResult<geos::CoordSeq> {
218-
let (has_z, has_m, dim_size) = match dim {
219-
Dimension::Xy => (false, false, 2),
220-
Dimension::Xyz => (true, false, 3),
221-
Dimension::Xym => (false, true, 3),
222-
Dimension::Xyzm => (true, true, 4),
218+
let (coord_type, dim_size) = match dim {
219+
Dimension::Xy => (CoordType::XY, 2),
220+
Dimension::Xyz => (CoordType::XYZ, 3),
221+
Dimension::Xym => (CoordType::XYM, 3),
222+
Dimension::Xyzm => (CoordType::XYZM, 4),
223223
};
224224
let num_ordinates = dim_size * num_coords;
225225

@@ -233,7 +233,7 @@ fn create_coord_sequence_from_raw_parts(
233233
{
234234
let coords_f64 =
235235
unsafe { &*core::ptr::slice_from_raw_parts(ptr as *const f64, num_ordinates) };
236-
geos::CoordSeq::new_from_buffer(coords_f64, num_coords, has_z, has_m)
236+
geos::CoordSeq::new_from_buffer(coords_f64, num_coords, coord_type)
237237
}
238238

239239
// On platforms without unaligned memory access support, we need to copy the data to the
@@ -249,7 +249,7 @@ fn create_coord_sequence_from_raw_parts(
249249
scratch.as_mut_ptr() as *mut u8,
250250
num_ordinates * std::mem::size_of::<f64>(),
251251
);
252-
geos::CoordSeq::new_from_buffer(scratch.as_slice(), num_coords, has_z, has_m)
252+
geos::CoordSeq::new_from_buffer(scratch.as_slice(), num_coords, coord_type)
253253
}
254254
}
255255
} else {
@@ -262,7 +262,7 @@ fn create_coord_sequence_from_raw_parts(
262262
save_f64_to_scratch::<LittleEndian>(scratch, buf, num_ordinates);
263263
}
264264
}
265-
geos::CoordSeq::new_from_buffer(scratch.as_slice(), num_coords, has_z, has_m)
265+
geos::CoordSeq::new_from_buffer(scratch.as_slice(), num_coords, coord_type)
266266
}
267267
}
268268

python/sedonadb/tests/functions/test_functions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,10 +1174,10 @@ def test_st_unaryunion(eng, geom, expected):
11741174
@pytest.mark.parametrize(
11751175
("geom", "expected"),
11761176
[
1177-
# Skip M tests because geos rust isn't capable of writing XYM geometries yet
1178-
# https://github.com/apache/sedona-db/issues/481
1177+
("POINT M EMPTY", "POINT M EMPTY"),
11791178
("POINT Z EMPTY", "POINT Z EMPTY"),
11801179
("POINT ZM EMPTY", "POINT ZM EMPTY"),
1180+
("POINT M (0 1 2)", "POINT M(0 1 2)"),
11811181
("POINT Z (0 0 0)", "POINT Z(0 0 0)"),
11821182
("POINT ZM (1 2 3 4)", "POINT ZM(1 2 3 4)"),
11831183
("LINESTRING Z (0 0 0, 1 1 1)", "LINESTRING Z(0 0 0,1 1 1)"),

0 commit comments

Comments
 (0)