From dbffc9ae448d269f1a008dff8f6225faaa34596c Mon Sep 17 00:00:00 2001 From: Madeleine Corneli Date: Mon, 27 Apr 2026 12:03:28 -0700 Subject: [PATCH] Add Exasol ODBC dialect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce ExasolDialect implementing SqlDialect, plus an `exasol` branch in OdbcReader's detect_dialect(). Lets the existing generic OdbcReader work correctly against Exasol with no new reader implementation — parallel in scope to the existing Snowflake dialect. Connection string: odbc://Driver=Exasol;EXAHOST=host:8563;EXAUID=user;EXAPWD=pass The 11 method overrides in src/reader/exasol.rs were each chosen to fix a default that is rejected or wrong on Exasol. Override SQL was verified against exasol/docker-db:2025.2.0-arm64dev.0 by a separate probe-test campaign maintained on a sibling branch (run with EXASOL_TEST_URL set; test module: src/reader/exasol_probes.rs, not included in this PR). Behavior justifications: - string_type_name -> VARCHAR(2000000) Bare VARCHAR is rejected in DDL on Exasol; an explicit length is required. 2_000_000 is Exasol's practical max — a smaller N risks truncating Polars/Arrow string columns at register() time. - time_type_name -> VARCHAR(32) Exasol has no SQL TIME type. `TIME '01:02:03'` raises `Feature not supported: SQL-Type TIME`. VARCHAR-for-TIME mirrors the SqliteDialect precedent at src/reader/sqlite.rs:45-47; consumers do a Polars/Arrow-side reparse for Vega-Lite temporal axes. - sql_greatest / sql_least Exasol has native GREATEST/LEAST; emit those instead of the ANSI CASE fallback for shorter, more readable SQL. - sql_date_literal -> ADD_DAYS(DATE '1970-01-01', N) Default `INTERVAL N DAY` (unquoted) is rejected by Exasol's parser. ADD_DAYS sidesteps interval syntax entirely and pairs naturally with ADD_SECONDS below. - sql_datetime_literal -> ADD_SECONDS(TIMESTAMP '...', secs) Default `INTERVAL N MICROSECOND` fails — MICROSECOND is not a valid Exasol interval subtype (Exasol supports YEAR/MONTH/DAY/HOUR/MINUTE/ SECOND only). Note: Exasol's TIMESTAMP itself truncates to millisecond precision; sub-millisecond input is silently zeroed by the database. This is documented in the file's top doc-block. - sql_time_literal -> 'HH:MM:SS.uuuuuu' string Default emits TIME literal + NANOSECOND interval, both unsupported. Time travels as VARCHAR(32) per time_type_name, so emitting an ISO-8601 string matches the storage contract. - sql_list_catalogs / sql_list_schemas / sql_list_tables / sql_list_columns Exasol has no information_schema views; the ANSI defaults raise `object not found`. Substituted with SYS.EXA_SCHEMAS, SYS.EXA_ALL_TABLES, and SYS.EXA_ALL_COLUMNS. Catalog filters are ignored (Exasol has no catalog tier above schemas). Single quotes in schema/table arguments are escaped via doubling. There is a separate, pre-existing OdbcReader Int32 buffer issue that affects DECIMAL-returning function results (e.g. GREATEST/LEAST over DECIMAL columns with precision < 10). The dialect's emitted SQL is correct; the issue is in the result-binding layer and will be filed as its own upstream issue. Tests (all in src/reader/exasol.rs and src/reader/odbc.rs): - 11 dialect tests, one per override, asserting exact SQL strings. - Single-quote escaping verified for sql_list_tables / sql_list_columns. - test_detect_dialect extended with `Driver=Exasol;...` recognition (existing Snowflake / PostgreSQL / generic-fallback assertions preserved). - New test_detect_dialect_exasol_curly_form covers `Driver={Exasol};` and uppercase `DRIVER={EXASOL};` mixed-case forms. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/reader/exasol.rs | 282 +++++++++++++++++++++++++++++++++++++++++++ src/reader/mod.rs | 3 + src/reader/odbc.rs | 32 ++++- 3 files changed, 315 insertions(+), 2 deletions(-) create mode 100644 src/reader/exasol.rs diff --git a/src/reader/exasol.rs b/src/reader/exasol.rs new file mode 100644 index 00000000..46d912c2 --- /dev/null +++ b/src/reader/exasol.rs @@ -0,0 +1,282 @@ +/* Exasol-specific SQL dialect. + * + * Override fragments verified against `exasol/docker-db:2025.2.0-arm64dev.0` + * by the sibling `probe-exasol-dialect-behaviors` plan (Runs 1, 2, 3). + * + * Known behavior caveats baked into this dialect: + * + * - TIMESTAMP precision is millisecond-only on Exasol. Sub-millisecond + * fractional input passed via `sql_datetime_literal` is silently truncated + * by the database (e.g. `...123456` rounds to `...123000`). This cannot be + * worked around at the dialect layer; document and accept. + * + * - TIME maps to `VARCHAR(32)`. Exasol has no SQL TIME type (`TIME '01:02:03'` + * raises *Feature not supported: SQL-Type TIME*). This mirrors the + * `SqliteDialect` precedent at `src/reader/sqlite.rs:45-47`. Polars-side + * reparse is required for Vega-Lite temporal axes. + * + * - A pre-existing `OdbcReader` Int32 buffer bug affects DECIMAL-returning + * function results (e.g. `GREATEST` / `LEAST` over decimals with precision + * < 10). The dialect's emitted SQL is correct; the bug is in the + * result-binding layer in `src/reader/odbc.rs`. Tracked in + * `specs/_plans/probe-exasol-dialect-behaviors/upstream-issue-draft.md` + * and will be filed upstream post-PR. + */ + +pub struct ExasolDialect; + +impl super::SqlDialect for ExasolDialect { + fn string_type_name(&self) -> Option<&str> { + Some("VARCHAR(2000000)") + } + + fn time_type_name(&self) -> Option<&str> { + Some("VARCHAR(32)") + } + + fn sql_greatest(&self, exprs: &[&str]) -> String { + format!("GREATEST({})", exprs.join(", ")) + } + + fn sql_least(&self, exprs: &[&str]) -> String { + format!("LEAST({})", exprs.join(", ")) + } + + fn sql_date_literal(&self, days_since_epoch: i32) -> String { + format!("ADD_DAYS(DATE '1970-01-01', {})", days_since_epoch) + } + + // Note: Exasol TIMESTAMP truncates to millisecond precision; sub-millisecond + // fractional input is silently zeroed by the database. + fn sql_datetime_literal(&self, microseconds_since_epoch: i64) -> String { + let seconds_with_fraction = microseconds_since_epoch as f64 / 1_000_000.0; + format!( + "ADD_SECONDS(TIMESTAMP '1970-01-01 00:00:00', {})", + seconds_with_fraction + ) + } + + // Time stored as VARCHAR(32) per time_type_name. Emit ISO-8601 string. + // Polars-side reparse is needed for Vega-Lite temporal axis (mirrors SqliteDialect). + fn sql_time_literal(&self, nanoseconds_since_midnight: i64) -> String { + let secs = nanoseconds_since_midnight / 1_000_000_000; + let h = secs / 3600; + let m = (secs % 3600) / 60; + let s = secs % 60; + let nanos = nanoseconds_since_midnight % 1_000_000_000; + let micros = nanos / 1_000; // VARCHAR carries µs; nanosecond truncation acceptable + format!("'{:02}:{:02}:{:02}.{:06}'", h, m, s, micros) + } + + fn sql_list_catalogs(&self) -> String { + // Exasol has no catalog layer above schemas; surface every schema as a top-level catalog + "SELECT SCHEMA_NAME AS catalog_name FROM SYS.EXA_SCHEMAS ORDER BY SCHEMA_NAME".into() + } + + fn sql_list_schemas(&self, _catalog: &str) -> String { + // Catalog argument is ignored: Exasol treats schema as the top tier + "SELECT SCHEMA_NAME AS schema_name FROM SYS.EXA_SCHEMAS ORDER BY SCHEMA_NAME".into() + } + + fn sql_list_tables(&self, _catalog: &str, schema: &str) -> String { + format!( + "SELECT TABLE_NAME AS table_name, \ + CASE WHEN TABLE_IS_VIRTUAL THEN 'VIEW' ELSE 'BASE TABLE' END AS table_type \ + FROM SYS.EXA_ALL_TABLES \ + WHERE TABLE_SCHEMA = '{}' \ + ORDER BY TABLE_NAME", + schema.replace('\'', "''") + ) + } + + fn sql_list_columns(&self, _catalog: &str, schema: &str, table: &str) -> String { + format!( + "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS data_type \ + FROM SYS.EXA_ALL_COLUMNS \ + WHERE COLUMN_SCHEMA = '{}' AND COLUMN_TABLE = '{}' \ + ORDER BY COLUMN_ORDINAL_POSITION", + schema.replace('\'', "''"), + table.replace('\'', "''") + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::reader::SqlDialect; + + #[test] + fn test_sql_greatest_uses_native() { + let d = ExasolDialect; + assert_eq!(d.sql_greatest(&["a", "b", "c"]), "GREATEST(a, b, c)"); + } + + #[test] + fn test_sql_least_uses_native() { + let d = ExasolDialect; + assert_eq!(d.sql_least(&["a", "b", "c"]), "LEAST(a, b, c)"); + } + + #[test] + fn test_sql_datetime_literal_uses_add_seconds() { + let d = ExasolDialect; + // 1_500_000 microseconds = 1.5 seconds since epoch + let sql = d.sql_datetime_literal(1_500_000); + assert!( + sql.contains("ADD_SECONDS(TIMESTAMP '1970-01-01 00:00:00'"), + "expected ADD_SECONDS form, got: {}", + sql + ); + assert!( + sql.contains("1.5"), + "expected fractional seconds 1.5 in: {}", + sql + ); + // Must not use the broken default INTERVAL N MICROSECOND form + assert!( + !sql.to_uppercase().contains("MICROSECOND"), + "must not emit MICROSECOND interval (unsupported on Exasol): {}", + sql + ); + } + + #[test] + fn test_sql_time_literal_emits_varchar_string() { + let d = ExasolDialect; + // 01:02:03.456789 → 3723 sec + 456_789_000 ns + let ns = 3723 * 1_000_000_000_i64 + 456_789_000; + let sql = d.sql_time_literal(ns); + assert_eq!(sql, "'01:02:03.456789'"); + // Must not use the broken default TIME literal / NANOSECOND interval forms + assert!( + !sql.to_uppercase().contains("TIME "), + "must not emit TIME literal: {}", + sql + ); + assert!( + !sql.to_uppercase().contains("NANOSECOND"), + "must not emit NANOSECOND interval: {}", + sql + ); + } + + #[test] + fn test_sql_date_literal_uses_add_days() { + let d = ExasolDialect; + let sql = d.sql_date_literal(20000); + assert_eq!(sql, "ADD_DAYS(DATE '1970-01-01', 20000)"); + } + + #[test] + fn test_string_type_name_has_length() { + let d = ExasolDialect; + assert_eq!(d.string_type_name(), Some("VARCHAR(2000000)")); + } + + #[test] + fn test_time_type_name_is_varchar() { + let d = ExasolDialect; + assert_eq!(d.time_type_name(), Some("VARCHAR(32)")); + } + + #[test] + fn test_sql_list_catalogs_uses_sys_exa_schemas() { + let d = ExasolDialect; + let sql = d.sql_list_catalogs(); + assert!( + sql.contains("SYS.EXA_SCHEMAS"), + "expected SYS.EXA_SCHEMAS in: {}", + sql + ); + assert!( + !sql.to_lowercase().contains("information_schema"), + "must not query information_schema (absent on Exasol): {}", + sql + ); + } + + #[test] + fn test_sql_list_schemas_uses_sys_exa_schemas() { + let d = ExasolDialect; + let sql = d.sql_list_schemas("ignored_catalog"); + assert!( + sql.contains("SYS.EXA_SCHEMAS"), + "expected SYS.EXA_SCHEMAS in: {}", + sql + ); + assert!( + !sql.to_lowercase().contains("information_schema"), + "must not query information_schema (absent on Exasol): {}", + sql + ); + } + + #[test] + fn test_sql_list_tables_uses_sys_exa_all_tables() { + let d = ExasolDialect; + let sql = d.sql_list_tables("ignored_catalog", "MY_SCHEMA"); + assert!( + sql.contains("SYS.EXA_ALL_TABLES"), + "expected SYS.EXA_ALL_TABLES in: {}", + sql + ); + assert!( + sql.contains("CASE WHEN TABLE_IS_VIRTUAL THEN 'VIEW' ELSE 'BASE TABLE' END"), + "expected synthesized table_type CASE expression in: {}", + sql + ); + assert!( + sql.contains("TABLE_SCHEMA = 'MY_SCHEMA'"), + "expected schema filter in: {}", + sql + ); + + // Schema-string escape: O'Brien → O''Brien + let sql_escaped = d.sql_list_tables("ignored", "O'Brien"); + assert!( + sql_escaped.contains("TABLE_SCHEMA = 'O''Brien'"), + "expected single-quote-escaped schema in: {}", + sql_escaped + ); + } + + #[test] + fn test_sql_list_columns_uses_sys_exa_all_columns() { + let d = ExasolDialect; + let sql = d.sql_list_columns("ignored_catalog", "MY_SCHEMA", "MY_TABLE"); + assert!( + sql.contains("SYS.EXA_ALL_COLUMNS"), + "expected SYS.EXA_ALL_COLUMNS in: {}", + sql + ); + assert!( + sql.contains("ORDER BY COLUMN_ORDINAL_POSITION"), + "expected ordinal-position ordering in: {}", + sql + ); + assert!( + sql.contains("COLUMN_SCHEMA = 'MY_SCHEMA'"), + "expected schema filter in: {}", + sql + ); + assert!( + sql.contains("COLUMN_TABLE = 'MY_TABLE'"), + "expected table filter in: {}", + sql + ); + + // Schema-string and table-string escape: O'Brien → O''Brien + let sql_escaped = d.sql_list_columns("ignored", "O'Brien", "T'bl"); + assert!( + sql_escaped.contains("COLUMN_SCHEMA = 'O''Brien'"), + "expected single-quote-escaped schema in: {}", + sql_escaped + ); + assert!( + sql_escaped.contains("COLUMN_TABLE = 'T''bl'"), + "expected single-quote-escaped table in: {}", + sql_escaped + ); + } +} diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 6646ada1..d1e9e332 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -306,6 +306,9 @@ pub mod odbc; #[cfg(feature = "odbc")] pub mod snowflake; +#[cfg(feature = "odbc")] +pub mod exasol; + pub mod connection; pub mod data; mod spec; diff --git a/src/reader/odbc.rs b/src/reader/odbc.rs index 427467e4..b1d51370 100644 --- a/src/reader/odbc.rs +++ b/src/reader/odbc.rs @@ -1,7 +1,8 @@ //! Generic ODBC data source implementation //! -//! Provides a reader for any ODBC-compatible database (Snowflake, PostgreSQL, -//! SQL Server, etc.) using the `odbc-api` crate. +//! Provides a reader for any ODBC-compatible database (Snowflake, Exasol, +//! PostgreSQL, SQL Server, etc.) using the `odbc-api` crate. Backend-specific +//! SQL dialects are dispatched at connection time via `detect_dialect()`. use crate::reader::Reader; use crate::{naming, DataFrame, GgsqlError, Result}; @@ -48,6 +49,8 @@ fn detect_dialect(conn_str: &str) -> Box { { Box::new(super::AnsiDialect) } + } else if lower.contains("driver=exasol") || lower.contains("driver={exasol") { + Box::new(super::exasol::ExasolDialect) } else { Box::new(super::AnsiDialect) } @@ -69,6 +72,16 @@ impl OdbcReader { /// Create a new ODBC reader from a `odbc://` connection URI. /// /// The URI format is `odbc://` followed by the raw ODBC connection string. + /// + /// # Examples + /// + /// ```text + /// // Snowflake + /// odbc://Driver=Snowflake;Server=acct.snowflakecomputing.com;... + /// + /// // Exasol + /// odbc://Driver=Exasol;EXAHOST=host:8563;EXAUID=user;EXAPWD=pass + /// ``` pub fn from_connection_string(uri: &str) -> Result { let conn_str = uri .strip_prefix("odbc://") @@ -763,6 +776,21 @@ mod tests { // Generic uses information_schema (ANSI default) let dialect = detect_dialect("Driver=SomeOther;Server=localhost"); assert!(dialect.sql_list_catalogs().contains("information_schema")); + + // Exasol uses SYS.EXA_SCHEMAS (no information_schema) + let dialect = detect_dialect("Driver=Exasol;EXAHOST=foo:8563"); + assert_eq!(dialect.string_type_name(), Some("VARCHAR(2000000)")); + assert!(dialect.sql_list_catalogs().contains("SYS.EXA_SCHEMAS")); + } + + #[test] + fn test_detect_dialect_exasol_curly_form() { + let dialect = detect_dialect("Driver={Exasol};EXAHOST=foo:8563"); + assert_eq!(dialect.string_type_name(), Some("VARCHAR(2000000)")); + + // Mixed-case driver name still resolves to Exasol dialect + let dialect = detect_dialect("DRIVER={EXASOL};EXAHOST=foo:8563"); + assert_eq!(dialect.string_type_name(), Some("VARCHAR(2000000)")); } #[test]