From 508ab2fab02c267085a140cdfc617de49ef8db9f Mon Sep 17 00:00:00 2001 From: George Stagg Date: Thu, 14 May 2026 11:36:07 +0100 Subject: [PATCH 1/4] Upgrade to duckdb v1.10502 and arrow v58 --- Cargo.lock | 147 ++++++++++++++++++++++----------- Cargo.toml | 6 +- src/plot/layer/geom/spatial.rs | 36 ++++++-- src/reader/duckdb.rs | 64 ++++++++++++-- 4 files changed, 187 insertions(+), 66 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9eaba74ba..18d04d75a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -140,9 +140,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +checksum = "378530e55cd479eda3c14eb345310799717e6f76d0c332041e8487022166b471" dependencies = [ "arrow-arith", "arrow-array", @@ -158,23 +158,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +checksum = "a0ab212d2c1886e802f51c5212d78ebbcbb0bec980fff9dadc1eb8d45cd0b738" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "num", + "num-traits", ] [[package]] name = "arrow-array" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +checksum = "cfd33d3e92f207444098c75b42de99d329562be0cf686b307b097cc52b4e999e" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -182,30 +182,34 @@ dependencies = [ "arrow-schema", "chrono", "half", - "hashbrown 0.16.1", - "num", + "hashbrown 0.17.0", + "num-complex", + "num-integer", + "num-traits", ] [[package]] name = "arrow-buffer" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +checksum = "0c6cd424c2693bcdbc150d843dc9d4d137dd2de4782ce6df491ad11a3a0416c0" dependencies = [ "bytes", "half", - "num", + "num-bigint", + "num-traits", ] [[package]] name = "arrow-cast" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +checksum = "4c5aefb56a2c02e9e2b30746241058b85f8983f0fcff2ba0c6d09006e1cded7f" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", + "arrow-ord", "arrow-schema", "arrow-select", "atoi", @@ -214,27 +218,28 @@ dependencies = [ "comfy-table", "half", "lexical-core", - "num", + "num-traits", "ryu", ] [[package]] name = "arrow-data" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +checksum = "3c88210023a2bfee1896af366309a3028fc3bcbd6515fa29a7990ee1baa08ee0" dependencies = [ "arrow-buffer", "arrow-schema", "half", - "num", + "num-integer", + "num-traits", ] [[package]] name = "arrow-ipc" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" +checksum = "238438f0834483703d88896db6fe5a7138b2230debc31b34c0336c2996e3c64f" dependencies = [ "arrow-array", "arrow-buffer", @@ -246,9 +251,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +checksum = "1bffd8fd2579286a5d63bac898159873e5094a79009940bcb42bbfce4f19f1d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -259,9 +264,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +checksum = "bab5994731204603c73ba69267616c50f80780774c6bb0476f1f830625115e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -272,32 +277,32 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +checksum = "f633dbfdf39c039ada1bf9e34c694816eb71fbb7dc78f613993b7245e078a1ed" dependencies = [ "bitflags", ] [[package]] name = "arrow-select" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +checksum = "8cd065c54172ac787cf3f2f8d4107e0d3fdc26edba76fdf4f4cc170258942222" dependencies = [ "ahash 0.8.12", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "num", + "num-traits", ] [[package]] name = "arrow-string" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +checksum = "29dd7cda3ab9692f43a2e4acc444d760cc17b12bb6d8232ddf64e9bab7c06b42" dependencies = [ "arrow-array", "arrow-buffer", @@ -305,7 +310,7 @@ dependencies = [ "arrow-schema", "arrow-select", "memchr", - "num", + "num-traits", "regex", "regex-syntax", ] @@ -580,6 +585,8 @@ version = "7.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" dependencies = [ + "crossterm 0.27.0", + "crossterm 0.28.1", "strum 0.26.3", "strum_macros 0.26.4", "unicode-width 0.2.2", @@ -641,7 +648,7 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "980c2afde4af43d6a05c5be738f9eae595cff86dce1f38f88b95058a98c027f3" dependencies = [ - "crossterm", + "crossterm 0.29.0", ] [[package]] @@ -675,7 +682,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04a63daf06a168535c74ab97cdba3ed4fa5d4f32cb36e437dcceb83d66854b7c" dependencies = [ "crokey-proc_macros", - "crossterm", + "crossterm 0.29.0", "once_cell", "serde", "strict", @@ -687,7 +694,7 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "847f11a14855fc490bd5d059821895c53e77eeb3c2b73ee3dded7ce77c93b231" dependencies = [ - "crossterm", + "crossterm 0.29.0", "proc-macro2", "quote", "strict", @@ -750,6 +757,30 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crossterm" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +dependencies = [ + "bitflags", + "crossterm_winapi", + "libc", + "parking_lot", + "winapi", +] + +[[package]] +name = "crossterm" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" +dependencies = [ + "bitflags", + "parking_lot", + "rustix 0.38.44", +] + [[package]] name = "crossterm" version = "0.29.0" @@ -762,7 +793,7 @@ dependencies = [ "document-features", "mio", "parking_lot", - "rustix", + "rustix 1.1.4", "signal-hook", "signal-hook-mio", "winapi", @@ -890,12 +921,13 @@ dependencies = [ [[package]] name = "duckdb" -version = "1.4.4" +version = "1.10502.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8685352ce688883098b61a361e86e87df66fc8c444f4a2411e884c16d5243a65" +checksum = "0fdc796383b176dd5a45353fbb5e64583c0ee4da12cb62c9e510b785324b2488" dependencies = [ "arrow", "cast", + "comfy-table", "fallible-iterator", "fallible-streaming-iterator", "hashlink 0.10.0", @@ -1816,9 +1848,9 @@ checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" [[package]] name = "libduckdb-sys" -version = "1.4.4" +version = "1.10502.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d78bacb8933586cee3b550c39b610d314f9b7a48701ac7a914a046165a4ad8da" +checksum = "8d7401630ae2abcff642f7156294289e50f2d222e061c026ad797b01bf20c215" dependencies = [ "cc", "flate2", @@ -1870,6 +1902,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -2119,14 +2157,13 @@ dependencies = [ [[package]] name = "parquet" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" +checksum = "5dafa7d01085b62a47dd0c1829550a0a36710ea9c4fe358a05a85477cec8a908" dependencies = [ "ahash 0.8.12", "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-data", "arrow-ipc", "arrow-schema", @@ -2135,9 +2172,10 @@ dependencies = [ "bytes", "chrono", "half", - "hashbrown 0.16.1", - "num", + "hashbrown 0.17.0", "num-bigint", + "num-integer", + "num-traits", "paste", "seq-macro", "snap", @@ -2659,6 +2697,19 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.52.0", +] + [[package]] name = "rustix" version = "1.1.4" @@ -2668,7 +2719,7 @@ dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.12.1", "windows-sys 0.61.2", ] @@ -3068,7 +3119,7 @@ dependencies = [ "fastrand", "getrandom 0.4.2", "once_cell", - "rustix", + "rustix 1.1.4", "windows-sys 0.61.2", ] @@ -4130,7 +4181,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" dependencies = [ "libc", - "rustix", + "rustix 1.1.4", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index f0f347a09..730f73a73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,11 +33,11 @@ csscolorparser = "0.8.1" tree-sitter = "0.26" # Data container -arrow = { version = "56", default-features = false } +arrow = { version = "58", default-features = false } # Readers -duckdb = { version = "~1.4", features = ["bundled", "vtab-arrow"] } -parquet = { version = "56", default-features = false, features = ["arrow", "snap"] } +duckdb = { version = "~1.10502", features = ["bundled", "vtab-arrow"] } +parquet = { version = "58", default-features = false, features = ["arrow", "snap"] } bytes = "1" rusqlite = { version = "0.38", features = ["bundled", "chrono"] } diff --git a/src/plot/layer/geom/spatial.rs b/src/plot/layer/geom/spatial.rs index 97766ce92..c73b8beb0 100644 --- a/src/plot/layer/geom/spatial.rs +++ b/src/plot/layer/geom/spatial.rs @@ -40,14 +40,34 @@ impl GeomTrait for Spatial { // Geometry columns use database-native types that don't have an Arrow equivalent. // Convert to standard WKB so the writer can parse them with geozero. - let col = naming::quote_ident(&naming::aesthetic_column("geometry")); - let wkb_expr = dialect.sql_geometry_to_wkb(&col); - Ok(StatResult::Transformed { - query: format!("SELECT * REPLACE ({wkb_expr} AS {col}) FROM ({query})"), - stat_columns: vec![], - dummy_columns: vec![], - consumed_aesthetics: vec![], - }) + let geom_col = naming::aesthetic_column("geometry"); + let col = naming::quote_ident(&geom_col); + + // Skip conversion if the geometry column is already in binary WKB format. + let already_wkb = _schema.iter().any(|c| { + c.name == geom_col + && matches!( + c.dtype, + arrow::datatypes::DataType::Binary | arrow::datatypes::DataType::LargeBinary + ) + }); + + if already_wkb { + Ok(StatResult::Transformed { + query: query.to_string(), + stat_columns: vec![], + dummy_columns: vec![], + consumed_aesthetics: vec![], + }) + } else { + let wkb_expr = dialect.sql_geometry_to_wkb(&col); + Ok(StatResult::Transformed { + query: format!("SELECT * REPLACE ({wkb_expr} AS {col}) FROM ({query})"), + stat_columns: vec![], + dummy_columns: vec![], + consumed_aesthetics: vec![], + }) + } } } diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index 3aadcc265..66855bd7b 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -27,12 +27,6 @@ fn register_builtin_datasets_duckdb(sql: &str, conn: &Connection) -> Result<()> let dataset_names = super::data::extract_builtin_dataset_names(sql)?; - // Load spatial extension before registering datasets that contain - // geometry columns, so DuckDB reads them as GEOMETRY rather than BLOB. - if dataset_names.iter().any(|n| n == "world") { - let _ = conn.execute("LOAD spatial", params![]); - } - for name in dataset_names { let Some(parquet_bytes) = super::data::builtin_parquet_bytes(&name) else { continue; @@ -54,9 +48,18 @@ fn register_builtin_datasets_duckdb(sql: &str, conn: &Connection) -> Result<()> })?; } + // Arrow export in duckdb-rs v1.10502.0 aborts on GEOMETRY columns. + // Cast to binary WKB when loading the world dataset. + // https://github.com/duckdb/duckdb-rs/issues/714 + let select_expr = if name == "world" { + "* REPLACE (ST_AsWKB(geom) AS geom)" + } else { + "*" + }; let create_sql = format!( - "CREATE TABLE IF NOT EXISTS {} AS SELECT * FROM read_parquet('{}')", + "CREATE TABLE IF NOT EXISTS {} AS SELECT {} FROM read_parquet('{}')", naming::quote_ident(&table_name), + select_expr, tmp_path.display() ); @@ -205,6 +208,16 @@ impl DuckDBReader { } }; + // https://github.com/duckdb/duckdb/issues/22133 + #[cfg(debug_assertions)] + conn.execute("SET disabled_optimizers TO 'common_subplan'", params![]) + .map_err(|e| { + GgsqlError::ReaderError(format!( + "Failed to disable common_subplan optimizer: {}", + e + )) + })?; + // Register Arrow virtual table function for DataFrame registration conn.register_table_function::("arrow") .map_err(|e| { @@ -859,4 +872,41 @@ mod tests { let json = writer.render(&spec).unwrap(); assert!(!json.is_empty(), "Boxplot should render successfully"); } + + #[cfg(feature = "spatial")] + #[test] + fn test_select_wkb_parquet_column() { + use std::{env, fs}; + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + reader.execute_sql("INSTALL spatial").unwrap(); + reader.execute_sql("LOAD spatial").unwrap(); + + let mut path = env::temp_dir(); + path.push("ggsql_test_wkb.parquet"); + reader + .execute_sql(&format!( + "COPY (SELECT ST_AsWKB(ST_GeomFromText('POINT(1 2)')) AS geom, 'a' AS name) \ + TO '{}' (FORMAT PARQUET)", + path.display() + )) + .unwrap(); + + let df = reader + .execute_sql(&format!("SELECT * FROM read_parquet('{}')", path.display())) + .unwrap(); + assert_eq!(df.height(), 1); + assert_eq!(df.width(), 2); + fs::remove_file(&path).ok(); + } + + #[cfg(all(feature = "spatial", feature = "builtin-data"))] + #[test] + fn test_select_geometry_from_builtin_world() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let df = reader + .execute_sql("SELECT geom FROM ggsql:world LIMIT 5") + .unwrap(); + assert_eq!(df.height(), 5); + assert_eq!(df.width(), 1); + } } From 63eb4516ced68e12c5bd1a06e945daa6f82351c2 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Thu, 14 May 2026 11:45:27 +0100 Subject: [PATCH 2/4] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 90d2d5fe9..1a9b20d65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ - `boxplot`, `violin`, and `range` now support omitting the categorical aesthetic, matching `bar`. `point` now treats both position aesthetics as optional. +- Upgraded dependencies: duckdb-rs v1.10502, arrow v58 (#447). ## 0.3.2 - 2026-05-05 From 555445882294b81c6250f711d9bb11964643b4c6 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Thu, 14 May 2026 12:03:09 +0100 Subject: [PATCH 3/4] Add back in auto-loading spatial --- src/reader/duckdb.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index 66855bd7b..7e8bf273e 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -27,6 +27,12 @@ fn register_builtin_datasets_duckdb(sql: &str, conn: &Connection) -> Result<()> let dataset_names = super::data::extract_builtin_dataset_names(sql)?; + // Load spatial extension before registering datasets that contain + // geometry columns, so that spatial features are available. + if dataset_names.iter().any(|n| n == "world") { + let _ = conn.execute("LOAD spatial", params![]); + } + for name in dataset_names { let Some(parquet_bytes) = super::data::builtin_parquet_bytes(&name) else { continue; From 127b96f183e0f9992de657d0aa7109df983a5b85 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Thu, 14 May 2026 13:20:58 +0100 Subject: [PATCH 4/4] Fix spatial example --- doc/syntax/layer/type/spatial.qmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/syntax/layer/type/spatial.qmd b/doc/syntax/layer/type/spatial.qmd index 15153d1a7..e5a05968e 100644 --- a/doc/syntax/layer/type/spatial.qmd +++ b/doc/syntax/layer/type/spatial.qmd @@ -73,7 +73,7 @@ Filtering based on spatial operations. ```{ggsql} VISUALISE FROM ggsql:world DRAW spatial - FILTER ST_Intersects(geom, ST_MakeEnvelope(-20.0, -35.0, 55.0, 38.0)) + FILTER ST_Intersects(ST_GeomFromWKB(geom), ST_MakeEnvelope(-20.0, -35.0, 55.0, 38.0)) ``` Make a choropleth map by mapping a variable to a fill aesthetic. @@ -83,4 +83,4 @@ VISUALISE FROM ggsql:world DRAW spatial MAPPING population AS fill SETTING opacity => 1 -``` \ No newline at end of file +```