diff --git a/Cargo.lock b/Cargo.lock index 92e97c16..9eaba74b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1123,6 +1123,43 @@ dependencies = [ "version_check", ] +[[package]] +name = "geo-types" +version = "0.7.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94776032c45f950d30a13af6113c2ad5625316c9abfbccee4dd5a6695f8fe0f5" +dependencies = [ + "approx", + "num-traits", + "serde", +] + +[[package]] +name = "geojson" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e26f3c45b36fccc9cf2805e61d4da6bc4bbd5a3a9589b01afa3a40eff703bd79" +dependencies = [ + "log", + "serde", + "serde_json", + "thiserror 2.0.18", +] + +[[package]] +name = "geozero" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5f28f34864745eb2f123c990c6ffd92c1584bd39439b3f27ff2a0f4ea5b309b" +dependencies = [ + "geojson", + "log", + "scroll", + "serde_json", + "thiserror 1.0.69", + "wkt", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -1173,6 +1210,7 @@ dependencies = [ "const_format", "csscolorparser", "duckdb", + "geozero", "jsonschema", "libloading", "palette", @@ -2688,6 +2726,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scroll" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" + [[package]] name = "seahash" version = "4.1.0" @@ -4052,6 +4096,18 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "wkt" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54f7f1ff4ea4c18936d6cd26a6fd24f0003af37e951a8e0e8b9e9a2d0bd0a46d" +dependencies = [ + "geo-types", + "log", + "num-traits", + "thiserror 1.0.69", +] + [[package]] name = "writeable" version = "0.6.3" diff --git a/Cargo.toml b/Cargo.toml index 6efd7d5f..f0f347a0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,6 +57,9 @@ thiserror = "1.0" # Color interpolation palette = { version = "0.7", default-features = false, features = ["std", "approx"] } +# Spatial +geozero = { version = "0.14", default-features = false } + # Utilities regex = "1.10" chrono = "0.4" diff --git a/doc/ggsql.xml b/doc/ggsql.xml index 55e56040..26955ec7 100644 --- a/doc/ggsql.xml +++ b/doc/ggsql.xml @@ -142,6 +142,7 @@ arrow rule range + spatial @@ -188,6 +189,7 @@ slope intercept + geometry panel row diff --git a/doc/syntax/index.qmd b/doc/syntax/index.qmd index 330492f1..40f5b236 100644 --- a/doc/syntax/index.qmd +++ b/doc/syntax/index.qmd @@ -33,6 +33,7 @@ There are many different layers to choose from when visualising your data. Some - [`boxplot`](layer/type/boxplot.qmd) displays continuous variables as 5-number summaries. - [`range`](layer/type/range.qmd) a line segment between two values along an axis, with optional hinges at the endpoints. - [`smooth`](layer/type/smooth.qmd) a trendline that follows the data shape. +- [`spatial`](layer/type/spatial.qmd) simple features from geometry. ### Position adjustments - [`stack`](layer/position/stack.qmd) places objects with a shared baseline on top of each other. diff --git a/doc/syntax/layer/type/spatial.qmd b/doc/syntax/layer/type/spatial.qmd new file mode 100644 index 00000000..15153d1a --- /dev/null +++ b/doc/syntax/layer/type/spatial.qmd @@ -0,0 +1,86 @@ +--- +title: "Spatial" +--- + +> Layers are declared with the [`DRAW` clause](../../clause/draw.qmd). Read the documentation for this clause for a thorough description of how to use it. + +The spatial layer is used to render geographic geometries consisting of polygons, lines and points used to make maps like choropleths. +It differs from other layers in that it uses a special [simple features](https://en.wikipedia.org/wiki/Simple_Features) geometry column that defines the shapes. + +## Aesthetics +The following aesthetics are recognised by the spatial layer. + +### Required +* `geometry`: a column of simple features. + +Note that the `geometry` column is required, but an attempt is made to detect such a column automatically. +In practise, this mapping does not often need to be declared. + +### Optional +* `stroke` The colour of the lines. +* `fill` The colour of the inner area. +* `colour` Shorthand for setting `stroke` and `fill` simultaneously. +* `opacity` The opacity of colours. +* `linewidth` The width of the lines. +* `linetype` The dash pattern of the line. + +## Settings +The spatial layer has no additional settings. + +## Data transformation +The spatial layer transforms the `geometry` column to [Well-Known Binary](https://libgeos.org/specifications/wkb/). + +## Orientation +The spatial layer has no orientations. + +## Examples + +Note that depending on your reader, you may need to activate modules for spatial analysis. + +```{ggsql} +-- For example, for DuckDB, one could use: +INSTALL spatial; +LOAD spatial; +``` + +A basic map of the world using built-in data. +Note that the geometry column is automatically detected. + +```{ggsql} +VISUALISE FROM ggsql:world +DRAW spatial +``` + +If the geometry column isn't automatically detected —for example because it has a non-standard name— you may need to declare the mapping explicitly. + +```{ggsql} +SELECT geom AS foo FROM ggsql:world +VISUALISE +DRAW spatial + MAPPING foo AS geometry +``` + +Filtering on other columns. + +```{ggsql} +VISUALISE FROM ggsql:world +DRAW spatial + FILTER continent == 'Asia' +``` + +Filtering based on spatial operations. + +```{ggsql} +VISUALISE FROM ggsql:world +DRAW spatial + FILTER ST_Intersects(geom, ST_MakeEnvelope(-20.0, -35.0, 55.0, 38.0)) +``` + +Make a choropleth map by mapping a variable to a fill aesthetic. + +```{ggsql} +VISUALISE FROM ggsql:world +DRAW spatial + MAPPING population AS fill + SETTING opacity => 1 +``` \ No newline at end of file diff --git a/ggsql-vscode/syntaxes/ggsql.tmLanguage.json b/ggsql-vscode/syntaxes/ggsql.tmLanguage.json index fb5f127e..bb2dbc82 100644 --- a/ggsql-vscode/syntaxes/ggsql.tmLanguage.json +++ b/ggsql-vscode/syntaxes/ggsql.tmLanguage.json @@ -269,7 +269,7 @@ { "comment": "Specialty and computed aesthetics", "name": "support.type.aesthetic.ggsql", - "match": "\\b(weight|coef|intercept|offset|density|count|intensity)\\b" + "match": "\\b(weight|coef|intercept|offset|density|count|intensity|geometry)\\b" }, { "comment": "Facet aesthetics", @@ -320,7 +320,7 @@ { "comment": "Geom types from grammar.js", "name": "support.type.geom.ggsql", - "match": "\\b(point|line|path|bar|col|area|tile|polygon|ribbon|histogram|density|smooth|boxplot|violin|text|label|segment|arrow|rule|range)\\b" + "match": "\\b(point|line|path|bar|col|area|tile|polygon|ribbon|histogram|density|smooth|boxplot|violin|text|label|segment|arrow|rule|range|spatial)\\b" }, { "include": "#common-clause-patterns" } ] @@ -334,7 +334,7 @@ "patterns": [ { "name": "support.type.geom.ggsql", - "match": "\\b(point|line|path|bar|col|area|tile|polygon|ribbon|histogram|density|smooth|boxplot|violin|text|label|segment|arrow|rule|range)\\b" + "match": "\\b(point|line|path|bar|col|area|tile|polygon|ribbon|histogram|density|smooth|boxplot|violin|text|label|segment|arrow|rule|range|spatial)\\b" }, { "include": "#common-clause-patterns" } ] diff --git a/src/Cargo.toml b/src/Cargo.toml index 91452030..9bcd0d8d 100644 --- a/src/Cargo.toml +++ b/src/Cargo.toml @@ -32,6 +32,9 @@ libloading = { workspace = true, optional = true } parquet = { workspace = true, optional = true } bytes = { workspace = true } +# Spatial +geozero = { workspace = true, optional = true, features = ["with-wkb", "with-geojson"] } + # Serialization serde.workspace = true serde_json.workspace = true @@ -53,11 +56,12 @@ tempfile = "3.8" ureq = "3" [features] -default = ["duckdb", "sqlite", "vegalite", "parquet", "builtin-data", "odbc"] +default = ["duckdb", "sqlite", "vegalite", "parquet", "builtin-data", "odbc", "spatial"] duckdb = ["dep:duckdb"] parquet = ["dep:parquet"] sqlite = ["dep:rusqlite"] odbc = ["dep:toml_edit", "dep:libloading"] +spatial = ["dep:geozero"] vegalite = [] builtin-data = [] all-readers = ["duckdb", "sqlite", "odbc"] diff --git a/src/data/world.parquet b/src/data/world.parquet new file mode 100644 index 00000000..5baadbb0 Binary files /dev/null and b/src/data/world.parquet differ diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 73ccf461..489ef3b0 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -1130,6 +1130,17 @@ pub fn prepare_data_with_reader(query: &str, reader: &dyn Reader) -> Result = data + .iter() + .filter(|r| r[naming::SOURCE_COLUMN] == layer_key.as_str()) + .collect(); + assert_eq!(spatial_rows.len(), 2); + + let feature = &spatial_rows[0]; + assert_eq!(feature["type"], "Feature"); + assert_eq!(feature["geometry"]["type"], "Polygon"); + } + /// Belt-and-braces regression test: a representative basket of error- /// triggering queries must never produce a user-visible message that /// contains an internal aesthetic name (`__ggsql_aes_*`, `pos1`, `pos2`, diff --git a/src/parser/builder.rs b/src/parser/builder.rs index 737b376a..3d5498fa 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -636,6 +636,7 @@ fn parse_geom_type(text: &str) -> Result { "arrow" => Ok(Geom::arrow()), "rule" => Ok(Geom::rule()), "range" => Ok(Geom::range()), + "spatial" => Ok(Geom::spatial()), _ => Err(GgsqlError::ParseError(format!( "Unknown geom type: {}", text diff --git a/src/plot/layer/geom/mod.rs b/src/plot/layer/geom/mod.rs index 56f97e11..fa90d035 100644 --- a/src/plot/layer/geom/mod.rs +++ b/src/plot/layer/geom/mod.rs @@ -43,6 +43,7 @@ mod ribbon; mod rule; mod segment; mod smooth; +mod spatial; mod text; mod tile; mod violin; @@ -68,6 +69,7 @@ pub use ribbon::Ribbon; pub use rule::Rule; pub use segment::Segment; pub use smooth::Smooth; +pub use spatial::Spatial; pub use text::Text; pub use tile::Tile; pub use violin::Violin; @@ -97,6 +99,7 @@ pub enum GeomType { Arrow, Rule, Range, + Spatial, } impl std::fmt::Display for GeomType { @@ -120,6 +123,7 @@ impl std::fmt::Display for GeomType { GeomType::Arrow => "arrow", GeomType::Rule => "rule", GeomType::Range => "range", + GeomType::Spatial => "spatial", }; write!(f, "{}", s) } @@ -225,6 +229,20 @@ pub trait GeomTrait: std::fmt::Debug + std::fmt::Display + Send + Sync { Ok(df) } + /// Auto-detect aesthetic mappings that require backend introspection. + /// + /// Called early in the pipeline (before global mapping merge) to let geoms + /// resolve aesthetics that depend on the database schema rather than the + /// Arrow schema. The default implementation does nothing. + fn detect_aesthetics( + &self, + _mappings: &mut Mappings, + _source_query: &str, + _schema: &Schema, + _reader: &dyn crate::reader::Reader, + ) { + } + /// Adjust layer mappings and parameters based on geom-specific logic. /// /// This method is called during layer execution to allow geoms to customize @@ -350,6 +368,11 @@ impl Geom { Self(Arc::new(Range)) } + /// Create a Spatial geom + pub fn spatial() -> Self { + Self(Arc::new(Spatial)) + } + /// Create a Geom from a GeomType pub fn from_type(t: GeomType) -> Self { match t { @@ -371,6 +394,7 @@ impl Geom { GeomType::Arrow => Self::arrow(), GeomType::Rule => Self::rule(), GeomType::Range => Self::range(), + GeomType::Spatial => Self::spatial(), } } @@ -450,6 +474,18 @@ impl Geom { self.0.setup_layer(mappings, parameters) } + /// Auto-detect aesthetics that require backend introspection + pub fn detect_aesthetics( + &self, + mappings: &mut Mappings, + source_query: &str, + schema: &Schema, + reader: &dyn crate::reader::Reader, + ) { + self.0 + .detect_aesthetics(mappings, source_query, schema, reader) + } + /// Get valid settings pub fn valid_settings(&self) -> Vec<&'static str> { self.0.valid_settings() @@ -583,6 +619,7 @@ mod tests { GeomType::Arrow, GeomType::Rule, GeomType::Range, + GeomType::Spatial, ]; // This test is rigged to trigger a compiler error when new variants are added. @@ -605,7 +642,8 @@ mod tests { | GeomType::Segment | GeomType::Arrow | GeomType::Rule - | GeomType::Range => {} + | GeomType::Range + | GeomType::Spatial => {} }; for geom_type in all_geom_types { diff --git a/src/plot/layer/geom/spatial.rs b/src/plot/layer/geom/spatial.rs new file mode 100644 index 00000000..92daba1c --- /dev/null +++ b/src/plot/layer/geom/spatial.rs @@ -0,0 +1,110 @@ +use super::{DefaultAesthetics, GeomTrait, GeomType, StatResult}; +use crate::plot::types::{AestheticValue, DefaultAestheticValue, Schema}; +use crate::{naming, Mappings}; + +#[derive(Debug, Clone, Copy)] +pub struct Spatial; + +impl GeomTrait for Spatial { + fn geom_type(&self) -> GeomType { + GeomType::Spatial + } + + fn aesthetics(&self) -> DefaultAesthetics { + DefaultAesthetics { + defaults: &[ + ("geometry", DefaultAestheticValue::Required), + ("fill", DefaultAestheticValue::String("#747474")), + ("stroke", DefaultAestheticValue::String("black")), + ("opacity", DefaultAestheticValue::Number(0.8)), + ("linewidth", DefaultAestheticValue::Number(0.2)), + ("linetype", DefaultAestheticValue::String("solid")), + ], + } + } + + fn detect_aesthetics( + &self, + mappings: &mut Mappings, + source_query: &str, + schema: &Schema, + reader: &dyn crate::reader::Reader, + ) { + if mappings.aesthetics.contains_key("geometry") { + return; + } + + // Prefer columns the backend reports as native geometry + let native_cols = reader.geometry_columns(source_query); + match native_cols.len() { + 1 => { + mappings.aesthetics.insert( + "geometry".to_string(), + AestheticValue::standard_column(&native_cols[0]), + ); + return; + } + // Ambiguous — user must declare explicitly + n if n > 1 => return, + _ => {} + } + + // Fall back to name + binary type heuristics + use arrow::datatypes::DataType; + let candidates: Vec<_> = schema + .iter() + .filter(|c| { + matches!( + c.name.to_lowercase().as_str(), + "geom" | "geometry" | "wkb_geometry" | "the_geom" | "shape" + ) && matches!( + c.dtype, + DataType::Binary | DataType::LargeBinary | DataType::BinaryView + ) + }) + .collect(); + + if candidates.len() == 1 { + mappings.aesthetics.insert( + "geometry".to_string(), + AestheticValue::standard_column(&candidates[0].name), + ); + } + } + + fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { + true + } + + fn apply_stat_transform( + &self, + query: &str, + _schema: &crate::plot::Schema, + _aesthetics: &Mappings, + _group_by: &[String], + _parameters: &std::collections::HashMap, + execute_query: &dyn Fn(&str) -> crate::Result, + dialect: &dyn crate::reader::SqlDialect, + ) -> crate::Result { + for stmt in dialect.sql_spatial_setup() { + execute_query(&stmt)?; + } + + // Geometry columns use database-native types that don't have an Arrow equivalent. + // Convert to standard WKB so the writer can parse them with geozero. + let col = naming::quote_ident(&naming::aesthetic_column("geometry")); + let wkb_expr = dialect.sql_geometry_to_wkb(&col); + Ok(StatResult::Transformed { + query: format!("SELECT * REPLACE ({wkb_expr} AS {col}) FROM ({query})"), + stat_columns: vec![], + dummy_columns: vec![], + consumed_aesthetics: vec![], + }) + } +} + +impl std::fmt::Display for Spatial { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "spatial") + } +} diff --git a/src/reader/data.rs b/src/reader/data.rs index 1dd90934..abf9e960 100644 --- a/src/reader/data.rs +++ b/src/reader/data.rs @@ -46,69 +46,20 @@ static AIRQUALITY: &[u8] = include_bytes!(concat!( "/data/airquality.parquet" )); +#[cfg(feature = "builtin-data")] +static WORLD: &[u8] = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/data/world.parquet")); + /// Get the embedded parquet bytes for a known builtin dataset. #[cfg(feature = "builtin-data")] pub fn builtin_parquet_bytes(name: &str) -> Option<&'static [u8]> { match name { "penguins" => Some(PENGUINS), "airquality" => Some(AIRQUALITY), + "world" => Some(WORLD), _ => None, } } -// ============================================================================= -// DuckDB builtin data registration (requires duckdb + builtin-data) -// ============================================================================= - -/// Register any builtin datasets referenced in the SQL with a DuckDB connection. -/// -/// Finds `ggsql:X` patterns in the SQL, writes the embedded parquet data to -/// a temp file, and creates a table named `__ggsql_data_X__` in DuckDB. -#[cfg(all(feature = "duckdb", feature = "builtin-data"))] -pub fn register_builtin_datasets_duckdb( - sql: &str, - conn: &duckdb::Connection, -) -> Result<(), GgsqlError> { - use std::{env, fs}; - - let dataset_names = extract_builtin_dataset_names(sql)?; - for name in dataset_names { - let Some(parquet_bytes) = builtin_parquet_bytes(&name) else { - continue; - }; - - let table_name = naming::builtin_data_table(&name); - - // Write parquet to temp file for DuckDB's read_parquet - let mut tmp_path = env::temp_dir(); - tmp_path.push(format!("{}.parquet", name)); - if !tmp_path.exists() { - fs::write(&tmp_path, parquet_bytes).map_err(|e| { - GgsqlError::ReaderError(format!( - "Failed to write builtin dataset '{}' to {}: {}", - name, - tmp_path.display(), - e - )) - })?; - } - - let create_sql = format!( - "CREATE TABLE IF NOT EXISTS {} AS SELECT * FROM read_parquet('{}')", - naming::quote_ident(&table_name), - tmp_path.display() - ); - - conn.execute(&create_sql, duckdb::params![]).map_err(|e| { - GgsqlError::ReaderError(format!( - "Failed to register builtin dataset '{}': {}", - name, e - )) - })?; - } - Ok(()) -} - // ============================================================================= // Arrow-based builtin data loading // ============================================================================= @@ -120,6 +71,7 @@ pub fn load_builtin_dataframe(name: &str) -> Result PENGUINS, "airquality" => AIRQUALITY, + "world" => WORLD, _ => { return Err(GgsqlError::ReaderError(format!( "Unknown builtin dataset: '{}'", @@ -160,7 +112,7 @@ pub fn load_builtin_dataframe(name: &str) -> Result bool { diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index 8d6a18aa..b7369f5d 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -13,6 +13,63 @@ use std::cell::RefCell; use std::collections::HashSet; use std::sync::Arc; +// ============================================================================= +// DuckDB builtin data registration +// ============================================================================= + +/// Register any builtin datasets referenced in the SQL with a DuckDB connection. +/// +/// Finds `ggsql:X` patterns in the SQL, writes the embedded parquet data to +/// a temp file, and creates a table named `__ggsql_data_X__` in DuckDB. +#[cfg(feature = "builtin-data")] +fn register_builtin_datasets_duckdb(sql: &str, conn: &Connection) -> Result<()> { + use std::{env, fs}; + + let dataset_names = super::data::extract_builtin_dataset_names(sql)?; + + // Load spatial extension before registering datasets that contain + // geometry columns, so DuckDB reads them as GEOMETRY rather than BLOB. + if dataset_names.iter().any(|n| n == "world") { + let _ = conn.execute("LOAD spatial", params![]); + } + + for name in dataset_names { + let Some(parquet_bytes) = super::data::builtin_parquet_bytes(&name) else { + continue; + }; + + let table_name = naming::builtin_data_table(&name); + + // Write parquet to temp file for DuckDB's read_parquet + let mut tmp_path = env::temp_dir(); + tmp_path.push(format!("{}.parquet", name)); + if !tmp_path.exists() { + fs::write(&tmp_path, parquet_bytes).map_err(|e| { + GgsqlError::ReaderError(format!( + "Failed to write builtin dataset '{}' to {}: {}", + name, + tmp_path.display(), + e + )) + })?; + } + + let create_sql = format!( + "CREATE TABLE IF NOT EXISTS {} AS SELECT * FROM read_parquet('{}')", + naming::quote_ident(&table_name), + tmp_path.display() + ); + + conn.execute(&create_sql, params![]).map_err(|e| { + GgsqlError::ReaderError(format!( + "Failed to register builtin dataset '{}': {}", + name, e + )) + })?; + } + Ok(()) +} + /// DuckDB SQL dialect with native function support. /// /// Overrides SQL generation methods to use DuckDB-native functions @@ -34,6 +91,14 @@ impl super::SqlDialect for DuckDbDialect { format!("LEAST({})", exprs.join(", ")) } + fn sql_geometry_to_wkb(&self, column: &str) -> String { + format!("ST_AsWKB({column})") + } + + fn sql_spatial_setup(&self) -> Vec { + vec!["LOAD spatial".into()] + } + fn sql_generate_series(&self, n: usize) -> String { format!( "\"__ggsql_seq__\"(n) AS (SELECT generate_series FROM GENERATE_SERIES(0, {}))", @@ -206,7 +271,7 @@ impl Reader for DuckDBReader { fn execute_sql(&self, sql: &str) -> Result { // Register builtin datasets if referenced #[cfg(feature = "builtin-data")] - super::data::register_builtin_datasets_duckdb(sql, &self.conn)?; + register_builtin_datasets_duckdb(sql, &self.conn)?; // Rewrite ggsql:name → __ggsql_data_name__ in SQL let sql = super::data::rewrite_namespaced_sql(sql)?; @@ -353,6 +418,22 @@ impl Reader for DuckDBReader { fn dialect(&self) -> &dyn super::SqlDialect { &DuckDbDialect } + + fn geometry_columns(&self, source_query: &str) -> Vec { + let sql = format!( + "SELECT column_name FROM (DESCRIBE ({source_query})) WHERE column_type = 'GEOMETRY'" + ); + self.execute_sql(&sql) + .ok() + .and_then(|df| { + df.inner() + .column(0) + .as_any() + .downcast_ref::() + .map(|arr| arr.iter().flatten().map(|s| s.to_string()).collect()) + }) + .unwrap_or_default() + } } #[cfg(test)] diff --git a/src/reader/mod.rs b/src/reader/mod.rs index b103c185..9d14b2e6 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -114,6 +114,21 @@ pub trait SqlDialect { result } + /// SQL expression to convert a geometry column to WKB. + /// + /// Default uses `ST_AsBinary` (OGC standard). Override for backends + /// with different function names (e.g. DuckDB uses `ST_AsWKB`). + fn sql_geometry_to_wkb(&self, column: &str) -> String { + format!("ST_AsBinary({column})") + } + + /// SQL statements to run before spatial operations. + /// + /// Override for backends that need an extension loaded (e.g. DuckDB spatial). + fn sql_spatial_setup(&self) -> Vec { + vec![] + } + /// Generate a series of integers 0..n-1 as a CTE fragment. /// /// Returns CTE fragment(s) producing table `__ggsql_seq__` with column `n`. @@ -543,6 +558,15 @@ pub trait Reader { } Ok(results) } + + /// Return column names that have a native geometry type in the given query. + /// + /// Backends that support spatial types override this to introspect their + /// native type system (e.g. DuckDB DESCRIBE). The default returns an empty + /// vec, which causes the caller to fall back to heuristics. + fn geometry_columns(&self, _source_query: &str) -> Vec { + Vec::new() + } } /// A table or view in the schema. diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 30c799a5..dce96d29 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -46,6 +46,7 @@ pub fn geom_to_mark(geom: &Geom) -> Value { GeomType::Smooth => "line", GeomType::Rule => "rule", GeomType::Range => "rule", + GeomType::Spatial => "geoshape", _ => "point", // Default fallback }; json!({ @@ -2129,6 +2130,130 @@ impl GeomRenderer for BoxplotRenderer { } } +// ============================================================================= +// Spatial Renderer +// ============================================================================= + +struct SpatialRenderer; + +#[cfg(feature = "spatial")] +impl SpatialRenderer { + fn wkb_to_geojson(wkb_bytes: &[u8]) -> Result { + use geozero::geojson::GeoJsonWriter; + use geozero::wkb::Wkb; + use geozero::GeozeroGeometry; + use std::io::Cursor; + + let mut geojson_out = Vec::new(); + let wkb = Wkb(wkb_bytes); + wkb.process_geom(&mut GeoJsonWriter::new(Cursor::new(&mut geojson_out))) + .map_err(|e| { + GgsqlError::WriterError(format!("Failed to convert WKB to GeoJSON: {}", e)) + })?; + + serde_json::from_slice(&geojson_out) + .map_err(|e| GgsqlError::WriterError(format!("Invalid GeoJSON from WKB: {}", e))) + } + + fn parse_geometry_from_array(array: &arrow::array::ArrayRef, idx: usize) -> Result { + use arrow::datatypes::DataType; + + if array.is_null(idx) { + return Ok(Value::Null); + } + + match array.data_type() { + DataType::Binary => { + let bin = array + .as_any() + .downcast_ref::() + .ok_or_else(|| { + GgsqlError::WriterError("Failed to read geometry as Binary".into()) + })?; + Self::wkb_to_geojson(bin.value(idx)) + } + DataType::LargeBinary => { + let bin = array + .as_any() + .downcast_ref::() + .ok_or_else(|| { + GgsqlError::WriterError("Failed to read geometry as LargeBinary".into()) + })?; + Self::wkb_to_geojson(bin.value(idx)) + } + other => Err(GgsqlError::WriterError(format!( + "Geometry column has unsupported type {:?}; expected Binary (WKB)", + other + ))), + } + } +} + +impl GeomRenderer for SpatialRenderer { + fn prepare_data( + &self, + df: &DataFrame, + _layer: &Layer, + _data_key: &str, + _binned_columns: &HashMap>, + ) -> Result { + #[cfg(not(feature = "spatial"))] + { + return Err(GgsqlError::WriterError( + "Spatial visualization requires the 'spatial' feature to be enabled".to_string(), + )); + } + + #[cfg(feature = "spatial")] + { + let geometry_col = naming::aesthetic_column("geometry"); + + let col_names: Vec = df + .get_column_names() + .iter() + .map(|s| s.to_string()) + .collect(); + + let mut features = Vec::with_capacity(df.height()); + + for row_idx in 0..df.height() { + let mut feature = serde_json::Map::new(); + feature.insert("type".to_string(), json!("Feature")); + + for col_name in &col_names { + let col = df.column(col_name).map_err(|e| { + GgsqlError::WriterError(format!( + "Failed to get column '{}': {}", + col_name, e + )) + })?; + + if *col_name == geometry_col { + let geom = Self::parse_geometry_from_array(col, row_idx)?; + feature.insert("geometry".to_string(), geom); + } else if !matches!( + // These are reserved names for the geojson format, so + // we shouldn't be inserting columns with that name. + // Our naming module should prevent such collisions, + // so this is purely defensive. + col_name.as_str(), + "type" | "geometry" | "properties" | "bbox" | "id" + ) { + let value = super::data::series_value_at(col, row_idx)?; + feature.insert(col_name.clone(), value); + } + } + features.push(Value::Object(feature)); + } + + Ok(PreparedData::Single { + values: features, + metadata: Box::new(()), + }) + } + } +} + // ============================================================================= // Dispatcher // ============================================================================= @@ -2147,6 +2272,7 @@ pub fn get_renderer(geom: &Geom) -> Box { GeomType::Text => Box::new(TextRenderer), GeomType::Range => Box::new(RangeRenderer), GeomType::Rule => Box::new(RuleRenderer), + GeomType::Spatial => Box::new(SpatialRenderer), // All other geoms (Point, Area, Ribbon, Density, Segment, etc.) use the default renderer _ => Box::new(DefaultRenderer), } diff --git a/src/writer/vegalite/mod.rs b/src/writer/vegalite/mod.rs index 97e585b1..72752676 100644 --- a/src/writer/vegalite/mod.rs +++ b/src/writer/vegalite/mod.rs @@ -296,6 +296,12 @@ fn build_layer_encoding( continue; } + // Skip geometry aesthetic - it is structural (consumed by SpatialRenderer + // to build GeoJSON Features), not a visual encoding channel. + if aesthetic == "geometry" { + continue; + } + let mut channel_name = map_aesthetic_name(aesthetic, &aesthetic_ctx, projection); // Opacity is retargeted to the fill when fill is supported if channel_name == "opacity" && layer.mappings.contains_key("fill") { diff --git a/tree-sitter-ggsql/grammar.js b/tree-sitter-ggsql/grammar.js index a1b53e87..70495609 100644 --- a/tree-sitter-ggsql/grammar.js +++ b/tree-sitter-ggsql/grammar.js @@ -534,7 +534,8 @@ module.exports = grammar({ geom_type: $ => choice( 'point', 'line', 'path', 'bar', 'area', 'tile', 'polygon', 'ribbon', 'histogram', 'density', 'smooth', 'boxplot', 'violin', - 'text', 'label', 'segment', 'arrow', 'rule', 'range' + 'text', 'label', 'segment', 'arrow', 'rule', 'range', + 'spatial' ), // MAPPING clause for aesthetic mappings: MAPPING col AS x, "blue" AS color [FROM source] @@ -716,7 +717,7 @@ module.exports = grammar({ // Text aesthetics 'label', 'typeface', 'fontweight', 'italic', 'fontsize', 'hjust', 'vjust', 'rotation', // Specialty aesthetics, - 'slope', + 'slope', 'geometry', // Facet aesthetics 'panel', 'row', 'column', // Computed variables diff --git a/tree-sitter-ggsql/queries/highlights.scm b/tree-sitter-ggsql/queries/highlights.scm index 492909d1..e8319356 100644 --- a/tree-sitter-ggsql/queries/highlights.scm +++ b/tree-sitter-ggsql/queries/highlights.scm @@ -26,6 +26,7 @@ "arrow" "rule" "range" + "spatial" ] @type.builtin ; Aesthetic names @@ -74,6 +75,7 @@ "rotation" ; Specialty aesthetics "slope" + "geometry" ; Facet aesthetics "panel" "row"