From 90bb61fe551a68fdb6139504a599f3d18954172b Mon Sep 17 00:00:00 2001 From: Thomas Lin Pedersen Date: Mon, 22 Jun 2026 21:31:06 +0200 Subject: [PATCH] update to ggsql 0.4.1 --- .../workflows/MainDistributionPipeline.yml | 12 +- CLAUDE.md | 2 +- duckdb | 2 +- extension-ci-tools | 2 +- rust/Cargo.lock | 115 ++++++------------ rust/Cargo.toml | 7 +- rust/src/dialect.rs | 98 ++++++++++++++- rust/src/reader.rs | 4 +- 8 files changed, 150 insertions(+), 92 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 73c35ba..e15cff1 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -14,19 +14,19 @@ concurrency: jobs: duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.5.1 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.5.4 with: - duckdb_version: v1.5.1 - ci_tools_version: v1.5.1 + duckdb_version: v1.5.4 + ci_tools_version: v1.5.4 extension_name: ggsql extra_toolchains: 'rust' exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads' code-quality-check: name: Code Quality Check - uses: duckdb/extension-ci-tools/.github/workflows/_extension_code_quality.yml@v1.5.1 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_code_quality.yml@v1.5.4 with: - duckdb_version: v1.5.1 - ci_tools_version: v1.5.1 + duckdb_version: v1.5.4 + ci_tools_version: v1.5.4 extension_name: ggsql format_checks: 'format;tidy' diff --git a/CLAUDE.md b/CLAUDE.md index ef3cf95..5c8e56b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -70,7 +70,7 @@ The inner `Connection` is created lazily and **persists for the whole `ggsql_exe ## Inlined `DuckDbDialect` -`rust/src/dialect.rs` carries a verbatim copy of ggsql's `DuckDbDialect` (currently from ggsql 0.3.2's `src/reader/duckdb.rs`). We can't enable ggsql's `duckdb` feature because it pulls in `duckdb-rs` with `bundled`, which would statically link a second DuckDB into an extension already loaded inside DuckDB (symbol clashes + binary bloat). If upstream changes the dialect, re-sync manually. +`rust/src/dialect.rs` carries a verbatim copy of ggsql's `DuckDbDialect` (currently from ggsql 0.4.1's `src/reader/duckdb.rs`, with the `pub(crate)` `wrap_with_column_aliases` helper inlined). We can't enable ggsql's `duckdb` feature because it pulls in `duckdb-rs` with `bundled`, which would statically link a second DuckDB into an extension already loaded inside DuckDB (symbol clashes + binary bloat). If upstream changes the dialect, re-sync manually. ## Conventions diff --git a/duckdb b/duckdb index 8a58519..08e34c4 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 8a5851971fae891f292c2714d86046ee018e9737 +Subproject commit 08e34c447bae34eaee3723cac61f2878b6bdf787 diff --git a/extension-ci-tools b/extension-ci-tools index ec20f45..b777c70 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit ec20f45aabeb9fcfcfa044dda249597f066d4826 +Subproject commit b777c70d30942cca5bef62d6d4fa23a13362f398 diff --git a/rust/Cargo.lock b/rust/Cargo.lock index b0a4546..35dce62 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -51,9 +51,9 @@ dependencies = [ [[package]] name = "arrow" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +checksum = "378530e55cd479eda3c14eb345310799717e6f76d0c332041e8487022166b471" dependencies = [ "arrow-arith", "arrow-array", @@ -69,23 +69,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +checksum = "a0ab212d2c1886e802f51c5212d78ebbcbb0bec980fff9dadc1eb8d45cd0b738" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "num", + "num-traits", ] [[package]] name = "arrow-array" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +checksum = "cfd33d3e92f207444098c75b42de99d329562be0cf686b307b097cc52b4e999e" dependencies = [ "ahash", "arrow-buffer", @@ -93,30 +93,34 @@ dependencies = [ "arrow-schema", "chrono", "half", - "hashbrown 0.16.1", - "num", + "hashbrown 0.17.0", + "num-complex", + "num-integer", + "num-traits", ] [[package]] name = "arrow-buffer" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +checksum = "0c6cd424c2693bcdbc150d843dc9d4d137dd2de4782ce6df491ad11a3a0416c0" dependencies = [ "bytes", "half", - "num", + "num-bigint", + "num-traits", ] [[package]] name = "arrow-cast" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +checksum = "4c5aefb56a2c02e9e2b30746241058b85f8983f0fcff2ba0c6d09006e1cded7f" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", + "arrow-ord", "arrow-schema", "arrow-select", "atoi", @@ -124,27 +128,28 @@ dependencies = [ "chrono", "half", "lexical-core", - "num", + "num-traits", "ryu", ] [[package]] name = "arrow-data" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +checksum = "3c88210023a2bfee1896af366309a3028fc3bcbd6515fa29a7990ee1baa08ee0" dependencies = [ "arrow-buffer", "arrow-schema", "half", - "num", + "num-integer", + "num-traits", ] [[package]] name = "arrow-ord" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +checksum = "1bffd8fd2579286a5d63bac898159873e5094a79009940bcb42bbfce4f19f1d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -155,9 +160,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +checksum = "bab5994731204603c73ba69267616c50f80780774c6bb0476f1f830625115e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -168,32 +173,32 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +checksum = "f633dbfdf39c039ada1bf9e34c694816eb71fbb7dc78f613993b7245e078a1ed" dependencies = [ "bitflags", ] [[package]] name = "arrow-select" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +checksum = "8cd065c54172ac787cf3f2f8d4107e0d3fdc26edba76fdf4f4cc170258942222" dependencies = [ "ahash", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "num", + "num-traits", ] [[package]] name = "arrow-string" -version = "56.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +checksum = "29dd7cda3ab9692f43a2e4acc444d760cc17b12bb6d8232ddf64e9bab7c06b42" dependencies = [ "arrow-array", "arrow-buffer", @@ -201,7 +206,7 @@ dependencies = [ "arrow-schema", "arrow-select", "memchr", - "num", + "num-traits", "regex", "regex-syntax", ] @@ -425,9 +430,9 @@ dependencies = [ [[package]] name = "ggsql" -version = "0.3.2" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba9db0ebae22cf5907cc8a4dbb0b3acbb665d8960c13c7e0e70eeb7dd687e3b2" +checksum = "862a0291cc018eeedc55b641775d4c20730cbc4c7f7c19e4677b12a339d1c903" dependencies = [ "arrow", "bytes", @@ -480,12 +485,6 @@ dependencies = [ "foldhash", ] -[[package]] -name = "hashbrown" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" - [[package]] name = "hashbrown" version = "0.17.0" @@ -683,20 +682,6 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - [[package]] name = "num-bigint" version = "0.4.6" @@ -725,28 +710,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -1133,9 +1096,9 @@ dependencies = [ [[package]] name = "tree-sitter-ggsql" -version = "0.3.2" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9293cfb7d46e05f3a0ff7edd98309420d0a1ba105aa314a2076e4b87302e623" +checksum = "44392a6fd9f993cad3d981fa7be0c3c55ccba9183279b57d749c9317e96b856d" dependencies = [ "cc", "tree-sitter", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index ebd0477..c95a17e 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -11,11 +11,12 @@ crate-type = ["staticlib"] # ggsql: default features disabled to avoid pulling duckdb-rs (bundled = second DuckDB # linked into our extension = binary bloat and symbol clashes). Dialect is inlined in # dialect.rs so we don't need the `duckdb` feature. -ggsql = { version = "0.3.2", default-features = false, features = ["vegalite"] } +ggsql = { version = "0.4.1", default-features = false, features = ["vegalite"] } # Arrow: ffi for consuming DuckDB's ArrowArrayStream. `arrow::compute::concat_batches` -# is always compiled — no feature flag needed. -arrow = { version = "56", default-features = false, features = ["ffi"] } +# is always compiled — no feature flag needed. Pinned to the same major as ggsql's +# `arrow` dep (58) so `DataFrame::from_record_batch` accepts the `RecordBatch` we build. +arrow = { version = "58", default-features = false, features = ["ffi"] } serde_json = "1" tiny_http = "0.12" diff --git a/rust/src/dialect.rs b/rust/src/dialect.rs index d25b665..d56a7c5 100644 --- a/rust/src/dialect.rs +++ b/rust/src/dialect.rs @@ -1,11 +1,15 @@ -// Inlined from ggsql 0.3.2 src/reader/duckdb.rs. +// Inlined from ggsql 0.4.1 src/reader/duckdb.rs (DuckDbDialect impl). // We can't enable ggsql's `duckdb` feature because that pulls in duckdb-rs with // `bundled`, statically linking a whole second DuckDB into an extension that is // already loaded inside DuckDB. DuckDbDialect itself has no duckdb-rs dependency, // so we copy it here. Re-sync if ggsql changes this dialect upstream. +// +// `wrap_with_column_aliases` is `pub(crate)` upstream, so it is inlined verbatim +// below rather than imported. `default_sql_aggregate` is `pub`, so we call it +// through `ggsql::reader`. use ggsql::naming; -use ggsql::reader::SqlDialect; +use ggsql::reader::{default_sql_aggregate, SqlDialect}; pub struct DuckDbDialect; @@ -24,6 +28,60 @@ impl SqlDialect for DuckDbDialect { format!("LEAST({})", exprs.join(", ")) } + fn sql_st_transform(&self, column: &str, source_crs: &str, target_crs: &str) -> String { + format!( + "ST_Transform({}, '{}', '{}', always_xy := true)", + column, + source_crs.replace('\'', "''"), + target_crs.replace('\'', "''") + ) + } + + /// WORKAROUND(duckdb-rs#714): geometry columns arrive as WKB BLOB via Arrow. + fn sql_ensure_geometry(&self, column: &str) -> String { + format!("ST_GeomFromWKB(CAST({column} AS BLOB))") + } + + fn sql_select_replace( + &self, + expr: &str, + col: &str, + from: &str, + _all_columns: &[String], + ) -> String { + format!("SELECT * REPLACE ({expr} AS {col}) FROM ({from})") + } + + fn sql_geometry_to_wkb(&self, column: &str) -> String { + format!("ST_AsWKB({column})") + } + + fn sql_geometry_bbox(&self, column: &str, from: &str) -> String { + format!( + "SELECT ST_XMin(ext) AS xmin, ST_YMin(ext) AS ymin, \ + ST_XMax(ext) AS xmax, ST_YMax(ext) AS ymax \ + FROM (SELECT ST_Extent_Agg({column}) AS ext FROM {from})" + ) + } + + fn sql_spatial_setup(&self) -> Vec { + vec!["LOAD spatial".into()] + } + + fn create_or_replace_temp_table_sql( + &self, + name: &str, + column_aliases: &[String], + body_sql: &str, + ) -> Vec { + let body = wrap_with_column_aliases(body_sql, column_aliases); + vec![format!( + "CREATE OR REPLACE TEMP TABLE {} AS {}", + naming::quote_ident(name), + body + )] + } + fn sql_generate_series(&self, n: usize) -> String { format!( "\"__ggsql_seq__\"(n) AS (SELECT generate_series FROM GENERATE_SERIES(0, {}))", @@ -31,6 +89,23 @@ impl SqlDialect for DuckDbDialect { ) } + fn sql_quantile_inline(&self, column: &str, fraction: f64) -> Option { + Some(format!( + "QUANTILE_CONT({}, {})", + naming::quote_ident(column), + fraction + )) + } + + fn sql_aggregate(&self, name: &str, qcol: &str) -> Option { + match name { + "first" => Some(format!("FIRST({})", qcol)), + "last" => Some(format!("LAST({})", qcol)), + "diff" => Some(format!("(LAST({c}) - FIRST({c}))", c = qcol)), + _ => default_sql_aggregate(name, qcol), + } + } + fn sql_percentile(&self, column: &str, fraction: f64, from: &str, groups: &[String]) -> String { let group_filter = groups .iter() @@ -54,3 +129,22 @@ impl SqlDialect for DuckDbDialect { ) } } + +/// Wrap a body SQL in a CTE with a column alias list when aliases are present. +/// Inlined from ggsql's `reader::wrap_with_column_aliases` (`pub(crate)` upstream). +/// This is a portable way to rename the body's output columns without relying +/// on `CREATE TABLE t(a, b) AS ...` (which SQLite does not support). +fn wrap_with_column_aliases(body_sql: &str, column_aliases: &[String]) -> String { + if column_aliases.is_empty() { + return body_sql.to_string(); + } + let cols = column_aliases + .iter() + .map(|c| naming::quote_ident(c)) + .collect::>() + .join(", "); + format!( + "WITH __ggsql_aliased__({}) AS ({}) SELECT * FROM __ggsql_aliased__", + cols, body_sql + ) +} diff --git a/rust/src/reader.rs b/rust/src/reader.rs index afb819d..09f03c2 100644 --- a/rust/src/reader.rs +++ b/rust/src/reader.rs @@ -65,13 +65,13 @@ impl Reader for CallbackReader { self.exec_sql_via_bridge(sql) } - // ggsql 0.3.0's engine no longer calls `register` on the visualise path — CTEs and + // ggsql's engine no longer calls `register` on the visualise path — CTEs and // the global query are materialised via `execute_sql(create_or_replace_temp_table_sql(...))` // instead. The trait method is still required, so we keep a stub that surfaces a // clean error if any future code path does invoke it. fn register(&self, _name: &str, _df: DataFrame, _replace: bool) -> Result<()> { Err(GgsqlError::ReaderError( - "ggsql-duckdb reader does not implement register; ggsql 0.3.0 should not call it" + "ggsql-duckdb reader does not implement register; the visualise path should not call it" .into(), )) }