diff --git a/CHANGELOG.md b/CHANGELOG.md index 149aca60e..90d2d5fe9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,12 @@ assumption in the VegaLite writer. We now correctly use the orientation to dodge in the correct dimension (#439). +### Changed + +- `boxplot`, `violin`, and `range` now support omitting the categorical + aesthetic, matching `bar`. `point` now treats both position aesthetics as + optional. + ## 0.3.2 - 2026-05-05 ### Fixed diff --git a/doc/syntax/layer/type/boxplot.qmd b/doc/syntax/layer/type/boxplot.qmd index 601f77663..20af39eb0 100644 --- a/doc/syntax/layer/type/boxplot.qmd +++ b/doc/syntax/layer/type/boxplot.qmd @@ -9,10 +9,12 @@ Boxplots display a summary of a continuous distribution. In the style of Tukey, The following aesthetics are recognised by the boxplot layer. ### Required -* Primary axis (e.g. `x`): The categorical variable to group by * Secondary axis (e.g. `y`): The continuous variable to summarize ### Optional +* Primary axis (e.g. `x`): The categorical variable to group by. If omitted a + single boxplot is drawn for the whole distribution and the (one-tick) + categorical axis is hidden. * `stroke`: The colour of the box contours, whiskers, median line and outliers. * `fill`: The colour of the box interior. * `colour`: Shorthand for setting `stroke` and `fill` simultaneously. Note that the median line will have bad visibility if `stroke` and `fill` are the same. @@ -96,6 +98,15 @@ DRAW boxplot MAPPING species AS y, bill_len AS x ``` +Omit the categorical axis to summarise the whole distribution as a single +boxplot: + +```{ggsql} +VISUALISE FROM ggsql:penguins +DRAW boxplot + MAPPING bill_len AS y +``` + Pair a half-violin with a half-boxplot on the same category by setting opposite `side` values: ```{ggsql} diff --git a/doc/syntax/layer/type/point.qmd b/doc/syntax/layer/type/point.qmd index b9fd50163..aa167ca7f 100644 --- a/doc/syntax/layer/type/point.qmd +++ b/doc/syntax/layer/type/point.qmd @@ -10,10 +10,15 @@ The point layer is used to create scatterplots. The scatterplot is most useful f The following aesthetics are recognised by the point layer. ### Required -* Primary axis (e.g. `x`): Position along the primary axis. -* Secondary axis (e.g. `y`): Position along the secondary axis. +The point layer has no required aesthetics. ### Optional +* Primary axis (e.g. `x`): Position along the primary axis. If omitted, all + points are drawn at a single discrete primary-axis position (a strip plot) + and the categorical axis is hidden. +* Secondary axis (e.g. `y`): Position along the secondary axis. Same dummy-axis + treatment as the primary. If both axes are omitted, all rows pile up at a + single point — only useful in combination with `aggregate`. * `size`: The size of each point * `colour`: The default colour of each point * `stroke`: The colour of the stroke around each point (if any). Overrides `colour` diff --git a/doc/syntax/layer/type/range.qmd b/doc/syntax/layer/type/range.qmd index 35771ef9f..cead370ba 100644 --- a/doc/syntax/layer/type/range.qmd +++ b/doc/syntax/layer/type/range.qmd @@ -10,11 +10,13 @@ The range layer displays an interval between two values along the secondary axis The following aesthetics are recognised by the range layer. ### Required -* Primary axis (e.g. `x`): Position along the primary axis. * Secondary axis minimum (e.g. `ymin`): Lower position along the secondary axis. * Secondary axis maximum (e.g. `ymax`): Upper position along the secondary axis. ### Optional +* Primary axis (e.g. `x`): Position along the primary axis. If omitted a + single interval is drawn over the whole dataset and the (one-tick) + categorical axis is hidden. * `stroke`/`colour`: The colour of the lines in the range. * `opacity`: The opacity of the colour. * `linewidth`: The width of the lines in the range. diff --git a/doc/syntax/layer/type/violin.qmd b/doc/syntax/layer/type/violin.qmd index 1c95dc252..9b0a03603 100644 --- a/doc/syntax/layer/type/violin.qmd +++ b/doc/syntax/layer/type/violin.qmd @@ -11,10 +11,12 @@ The violins are mirrored kernel density estimates, similar to the [density](dens The following aesthetics are recognised by the violin layer. ### Required -* Primary axis (e.g. `x`): The categorical variable for grouping. * Secondary axis (e.g. `y`): The continuous variable to compute density for. ### Optional +* Primary axis (e.g. `x`): The categorical variable for grouping. If omitted + a single violin is drawn for the whole distribution and the (one-tick) + categorical axis is hidden. * `stroke`: The colour of the contour lines. * `fill`: The colour of the inner area. * `colour`: Shorthand for setting `stroke` and `fill` simultaneously. diff --git a/src/execute/layer.rs b/src/execute/layer.rs index 7a75a1d48..c78480444 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -567,7 +567,8 @@ where // Apply literal default remappings from geom defaults (e.g., y2 => 0.0 for bar baseline). // These apply regardless of stat transform, but only if user hasn't overridden them. // Defaults are always in aligned orientation. - for (aesthetic, default_value) in layer.geom.default_remappings().defaults { + let implicit_remappings = layer.geom.implicit_default_remappings(); + for (aesthetic, default_value) in &implicit_remappings { // Only process literal values here (Column values are handled in Transformed branch) if !matches!(default_value, DefaultAestheticValue::Column(_)) { // Only add if user hasn't already specified this aesthetic in remappings or mappings @@ -591,7 +592,7 @@ where // Build stat column -> aesthetic mappings from geom defaults for renaming let mut final_remappings: HashMap = HashMap::new(); - for (aesthetic, default_value) in layer.geom.default_remappings().defaults { + for (aesthetic, default_value) in &implicit_remappings { if let DefaultAestheticValue::Column(stat_col) = default_value { // Stat column mapping: stat_col -> aesthetic (for rename) final_remappings.insert(stat_col.to_string(), aesthetic.to_string()); diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 04963f94b..c26f7cd0d 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -127,7 +127,7 @@ fn validate( // Validate remapping source columns are valid stat columns for this geom. // Geoms that opt into the Aggregate stat (`supports_aggregate`) also accept // `aggregate`, `count`, and any position aesthetic name as a stat source. - let valid_stat_columns = layer.geom.valid_stat_columns(); + let valid_stat_columns = layer.geom.implicit_valid_stat_columns(); let supports_aggregate = layer.geom.supports_aggregate(); for stat_value in layer.remappings.aesthetics.values() { if let Some(stat_col) = stat_value.column_name() { @@ -3048,11 +3048,12 @@ mod tests { ) .unwrap(); - // Query missing required aesthetic 'y' - should show 'y' not 'pos2' + // Query missing required aesthetic 'y' - should show 'y' not 'pos2'. + // Use line, which still requires both x and y (point's x is optional). let query = r#" SELECT * FROM test_data VISUALISE - DRAW point MAPPING a AS x + DRAW line MAPPING a AS x "#; let result = prepare_data_with_reader(query, &reader); diff --git a/src/plot/layer/geom/area.rs b/src/plot/layer/geom/area.rs index 6fc357063..266add133 100644 --- a/src/plot/layer/geom/area.rs +++ b/src/plot/layer/geom/area.rs @@ -60,10 +60,6 @@ impl GeomTrait for Area { Some(&["pos1"]) } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn apply_stat_transform( &self, query: &str, diff --git a/src/plot/layer/geom/bar.rs b/src/plot/layer/geom/bar.rs index 211e89a08..b1441cbbd 100644 --- a/src/plot/layer/geom/bar.rs +++ b/src/plot/layer/geom/bar.rs @@ -4,7 +4,7 @@ use std::collections::HashMap; use std::collections::HashSet; use super::stat_aggregate; -use super::types::{get_column_name, POSITION_VALUES}; +use super::types::{get_column_name, wrap_stat_with_dummy_pos1, POSITION_VALUES}; use super::{ has_aggregate_param, DefaultAesthetics, DefaultParamValue, GeomTrait, GeomType, ParamConstraint, ParamDefinition, StatResult, @@ -35,8 +35,8 @@ impl GeomTrait for Bar { // if we ever want to make 'width' an aesthetic, we'd probably need to // translate it to 'size'. defaults: &[ - ("pos1", DefaultAestheticValue::Null), // Optional - stat may provide - ("pos2", DefaultAestheticValue::Null), // Optional - stat may compute + ("pos1", DefaultAestheticValue::Dummy), // Optional - stat synthesises a dummy if omitted + ("pos2", DefaultAestheticValue::Null), // Optional - stat computes count when omitted ("pos2end", DefaultAestheticValue::Delayed), ("weight", DefaultAestheticValue::Null), ("fill", DefaultAestheticValue::String("black")), @@ -50,14 +50,13 @@ impl GeomTrait for Bar { DefaultAesthetics { defaults: &[ ("pos2", DefaultAestheticValue::Column("count")), - ("pos1", DefaultAestheticValue::Column("pos1")), ("pos2end", DefaultAestheticValue::Number(0.0)), ], } } fn valid_stat_columns(&self) -> &'static [&'static str] { - &["count", "pos1", "proportion"] + &["count", "proportion"] } fn default_params(&self) -> &'static [ParamDefinition] { @@ -85,10 +84,6 @@ impl GeomTrait for Bar { Some(&[]) } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true // Bar stat decides COUNT vs identity based on y mapping - } - fn apply_stat_transform( &self, query: &str, @@ -100,8 +95,8 @@ impl GeomTrait for Bar { dialect: &dyn SqlDialect, aesthetic_ctx: &crate::plot::aesthetic::AestheticContext, ) -> Result { - if has_aggregate_param(parameters) { - return stat_aggregate::apply( + let inner = if has_aggregate_param(parameters) { + stat_aggregate::apply( query, schema, aesthetics, @@ -110,9 +105,20 @@ impl GeomTrait for Bar { dialect, aesthetic_ctx, self.aggregate_domain_aesthetics().unwrap_or(&[]), - ); + )? + } else { + stat_bar_count(query, schema, aesthetics, group_by)? + }; + // When the user omits the categorical axis, post-wrap with the dummy + // pos1 column so the writer suppresses the one-tick axis. Composes + // with both the aggregate and identity-path outputs (the `count` + // branch of stat_bar_count already injects its own dummy column — + // wrap_stat_with_dummy_pos1's idempotency keeps that path correct). + if get_column_name(aesthetics, "pos1").is_none() { + Ok(wrap_stat_with_dummy_pos1(query, inner)) + } else { + Ok(inner) } - stat_bar_count(query, schema, aesthetics, group_by) } } diff --git a/src/plot/layer/geom/boxplot.rs b/src/plot/layer/geom/boxplot.rs index 1d8b832f2..e49599eda 100644 --- a/src/plot/layer/geom/boxplot.rs +++ b/src/plot/layer/geom/boxplot.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; -use super::types::{POSITION_VALUES, SIDE_VALUES}; +use super::types::{wrap_with_dummy_axis, POSITION_VALUES, SIDE_VALUES}; use super::{DefaultAesthetics, GeomTrait, GeomType}; use crate::{ naming, @@ -26,7 +26,10 @@ impl GeomTrait for Boxplot { fn aesthetics(&self) -> DefaultAesthetics { DefaultAesthetics { defaults: &[ - ("pos1", DefaultAestheticValue::Required), + // pos1 is dummy-able. `stat_boxplot` handles the synthesis + // itself by pre-wrapping the input so the existing GROUP BY + // collapses to a single boxplot of the whole pos2 distribution. + ("pos1", DefaultAestheticValue::Dummy), ("pos2", DefaultAestheticValue::Required), ("stroke", DefaultAestheticValue::String("black")), ("fill", DefaultAestheticValue::String("white")), @@ -46,10 +49,6 @@ impl GeomTrait for Boxplot { &["pos2"] } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn default_params(&self) -> &'static [super::ParamDefinition] { const PARAMS: &[ParamDefinition] = &[ ParamDefinition { @@ -122,9 +121,17 @@ fn stat_boxplot( let y = get_column_name(aesthetics, "pos2").ok_or_else(|| { GgsqlError::ValidationError("Boxplot requires 'y' aesthetic mapping".to_string()) })?; - let x = get_column_name(aesthetics, "pos1").ok_or_else(|| { - GgsqlError::ValidationError("Boxplot requires 'x' aesthetic mapping".to_string()) - })?; + + // pos1 is optional. When the user omits it, wrap the input query with a + // synthetic dummy categorical column and group by that column, so the + // existing GROUP BY / summary pipeline collapses to a single boxplot. + let (working_query, x, use_dummy) = match get_column_name(aesthetics, "pos1") { + Some(col) => (query.to_string(), col, false), + None => { + let dummy_col = naming::stat_column("pos1"); + (wrap_with_dummy_axis(query, "pos1"), dummy_col, true) + } + }; // Get coef parameter (validated by ParamConstraint::number_min) let ParameterValue::Number(coef) = parameters.get("coef").unwrap() else { @@ -153,17 +160,25 @@ fn stat_boxplot( } // Query for boxplot summary statistics - let summary = boxplot_sql_compute_summary(query, &groups, &value_col, coef, dialect); - let stats_query = boxplot_sql_append_outliers(&summary, &groups, &value_col, query, outliers); + let summary = boxplot_sql_compute_summary(&working_query, &groups, &value_col, coef, dialect); + let stats_query = + boxplot_sql_append_outliers(&summary, &groups, &value_col, &working_query, outliers); + + let mut stat_columns = vec![ + "type".to_string(), + "value".to_string(), + "value2".to_string(), + ]; + let mut dummy_columns: Vec = vec![]; + if use_dummy { + stat_columns.push("pos1".to_string()); + dummy_columns.push("pos1".to_string()); + } Ok(StatResult::Transformed { query: stats_query, - stat_columns: vec![ - "type".to_string(), - "value".to_string(), - "value2".to_string(), - ], - dummy_columns: vec![], + stat_columns, + dummy_columns, consumed_aesthetics: vec!["pos2".to_string()], }) } @@ -522,9 +537,10 @@ mod tests { let boxplot = Boxplot; let aes = boxplot.aesthetics(); - assert!(aes.is_required("pos1")); + // pos1 is optional (omit → dummy categorical axis); pos2 is required. + assert!(!aes.is_required("pos1")); assert!(aes.is_required("pos2")); - assert_eq!(aes.required().len(), 2); + assert_eq!(aes.required(), vec!["pos2"]); } #[test] @@ -587,6 +603,8 @@ mod tests { let boxplot = Boxplot; let remappings = boxplot.default_remappings(); + // pos1 is `Dummy` in aesthetics() so the `Geom` wrapper auto-derives + // its remapping. The trait method returns only the explicit entries. assert_eq!(remappings.defaults.len(), 3); assert!(remappings .defaults @@ -599,6 +617,48 @@ mod tests { .contains(&("type", DefaultAestheticValue::Column("type")))); } + #[test] + fn test_boxplot_dummy_pos1_when_unmapped() { + use crate::plot::AestheticValue; + let mut aesthetics = Mappings::new(); + aesthetics.insert( + "pos2".to_string(), + AestheticValue::standard_column("value".to_string()), + ); + let mut parameters: HashMap = HashMap::new(); + parameters.insert("coef".to_string(), ParameterValue::Number(1.5)); + parameters.insert("outliers".to_string(), ParameterValue::Boolean(true)); + + let result = stat_boxplot( + "SELECT * FROM data", + &aesthetics, + &[], + ¶meters, + &AnsiDialect, + ) + .expect("stat_boxplot should succeed without pos1"); + + match result { + StatResult::Transformed { + query, + stat_columns, + dummy_columns, + consumed_aesthetics, + } => { + // The wrapped input introduces a synthetic pos1 column that the + // GROUP BY then collapses to a single boxplot. + assert!(query.contains("__ggsql_stat_dummy")); + assert!(query.contains("__ggsql_stat_pos1")); + assert!(stat_columns.contains(&"pos1".to_string())); + assert!(stat_columns.contains(&"type".to_string())); + assert!(stat_columns.contains(&"value".to_string())); + assert_eq!(dummy_columns, vec!["pos1".to_string()]); + assert_eq!(consumed_aesthetics, vec!["pos2".to_string()]); + } + _ => panic!("expected Transformed"), + } + } + #[test] fn test_boxplot_stat_consumed_aesthetics() { let boxplot = Boxplot; @@ -608,13 +668,6 @@ mod tests { assert_eq!(consumed[0], "pos2"); } - #[test] - fn test_boxplot_needs_stat_transform() { - let boxplot = Boxplot; - let aesthetics = Mappings::new(); - assert!(boxplot.needs_stat_transform(&aesthetics)); - } - #[test] fn test_boxplot_display() { let boxplot = Boxplot; diff --git a/src/plot/layer/geom/density.rs b/src/plot/layer/geom/density.rs index 3fe62f9af..8032f5f47 100644 --- a/src/plot/layer/geom/density.rs +++ b/src/plot/layer/geom/density.rs @@ -54,10 +54,6 @@ impl GeomTrait for Density { } } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn default_params(&self) -> &'static [ParamDefinition] { const PARAMS: &[ParamDefinition] = &[ ParamDefinition { diff --git a/src/plot/layer/geom/histogram.rs b/src/plot/layer/geom/histogram.rs index bfb800502..fc37c10f6 100644 --- a/src/plot/layer/geom/histogram.rs +++ b/src/plot/layer/geom/histogram.rs @@ -84,10 +84,6 @@ impl GeomTrait for Histogram { &["pos1"] } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn apply_stat_transform( &self, query: &str, diff --git a/src/plot/layer/geom/line.rs b/src/plot/layer/geom/line.rs index 624034586..b40b975f5 100644 --- a/src/plot/layer/geom/line.rs +++ b/src/plot/layer/geom/line.rs @@ -48,10 +48,6 @@ impl GeomTrait for Line { Some(&["pos1"]) } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn apply_stat_transform( &self, query: &str, diff --git a/src/plot/layer/geom/mod.rs b/src/plot/layer/geom/mod.rs index 74004da5c..8a0ab55d8 100644 --- a/src/plot/layer/geom/mod.rs +++ b/src/plot/layer/geom/mod.rs @@ -20,6 +20,7 @@ //! assert!(point.aesthetics().is_required("pos1")); //! ``` +use crate::plot::types::DefaultAestheticValue; use crate::{DataFrame, Mappings, Result}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -193,11 +194,6 @@ pub trait GeomTrait: std::fmt::Debug + std::fmt::Display + Send + Sync { &[] } - /// Check if this geom requires a statistical transformation - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - false - } - /// Whether the Aggregate stat applies to this geom, and which aesthetics /// stay as group keys when it does. /// @@ -224,9 +220,16 @@ pub trait GeomTrait: std::fmt::Debug + std::fmt::Display + Send + Sync { /// Apply statistical transformation to the layer query. /// - /// The default implementation dispatches to the Aggregate stat when - /// `supports_aggregate()` is true and the `aggregate` parameter is set; - /// otherwise returns identity (no transformation). + /// The default implementation: + /// 1. Dispatches to the Aggregate stat when `supports_aggregate()` is + /// true and the `aggregate` parameter is set. + /// 2. For each position axis declared as `Dummy` in `aesthetics()`, + /// post-wraps the result with a synthetic categorical column when + /// *no* aesthetic in the axis's family (e.g. `pos1`, `pos1min`, + /// `pos1max`, …) is mapped. The writer then suppresses the + /// (otherwise one-tick) axis. Geoms whose bespoke stat already + /// synthesises positions (`bar`, `boxplot`, `violin`, `histogram`, + /// …) override `apply_stat_transform` and are unaffected. #[allow(clippy::too_many_arguments)] fn apply_stat_transform( &self, @@ -239,11 +242,11 @@ pub trait GeomTrait: std::fmt::Debug + std::fmt::Display + Send + Sync { dialect: &dyn SqlDialect, aesthetic_ctx: &AestheticContext, ) -> Result { - if let (Some(domain), true) = ( + let mut result = if let (Some(domain), true) = ( self.aggregate_domain_aesthetics(), has_aggregate_param(parameters), ) { - return stat_aggregate::apply( + stat_aggregate::apply( query, schema, aesthetics, @@ -252,9 +255,19 @@ pub trait GeomTrait: std::fmt::Debug + std::fmt::Display + Send + Sync { dialect, aesthetic_ctx, domain, - ); + )? + } else { + StatResult::Identity + }; + + let aes = self.aesthetics(); + for axis in aes.dummy_axes() { + if !types::axis_family_has_mapping(aesthetics, axis) { + result = types::wrap_stat_with_dummy_axis(query, result, axis); + } } - Ok(StatResult::Identity) + + Ok(result) } /// Post-process the DataFrame after stat query execution. @@ -446,16 +459,48 @@ impl Geom { self.0.aesthetics() } - /// Get default remappings + /// Get default remappings as explicitly declared by the geom. + /// + /// Most callers want [`implicit_default_remappings`], which also + /// includes auto-derived entries for `Dummy` axes. pub fn default_remappings(&self) -> DefaultAesthetics { self.0.default_remappings() } - /// Get valid stat columns + /// Default remappings merged with auto-derived `(axis, Column(axis))` + /// entries for every aesthetic declared as `Dummy` that isn't already + /// covered by an explicit remapping. The merged list is what should be + /// fed to the executor's rename pass. + pub fn implicit_default_remappings(&self) -> Vec<(&'static str, DefaultAestheticValue)> { + let explicit = self.0.default_remappings(); + let mut out: Vec<(&'static str, DefaultAestheticValue)> = explicit.defaults.to_vec(); + for axis in self.0.aesthetics().dummy_axes() { + if !out.iter().any(|(name, _)| *name == axis) { + out.push((axis, DefaultAestheticValue::Column(axis))); + } + } + out + } + + /// Get valid stat columns as explicitly declared by the geom. pub fn valid_stat_columns(&self) -> &'static [&'static str] { self.0.valid_stat_columns() } + /// Valid stat columns merged with the axis names of every `Dummy` + /// aesthetic declared by the geom. The executor uses this to validate + /// REMAPPING targets. + pub fn implicit_valid_stat_columns(&self) -> Vec<&'static str> { + let explicit = self.0.valid_stat_columns(); + let mut out: Vec<&'static str> = explicit.to_vec(); + for axis in self.0.aesthetics().dummy_axes() { + if !out.contains(&axis) { + out.push(axis); + } + } + out + } + /// Get default parameters pub fn default_params(&self) -> &'static [ParamDefinition] { self.0.default_params() @@ -466,11 +511,6 @@ impl Geom { self.0.stat_consumed_aesthetics() } - /// Check if stat transform is needed - pub fn needs_stat_transform(&self, aesthetics: &Mappings) -> bool { - self.0.needs_stat_transform(aesthetics) - } - /// Apply stat transform #[allow(clippy::too_many_arguments)] pub fn apply_stat_transform( @@ -621,8 +661,9 @@ mod tests { fn test_geom_aesthetics() { let point = Geom::point(); let aes = point.aesthetics(); - assert!(aes.is_required("pos1")); - assert!(aes.is_required("pos2")); + // Both axes are optional - omitted axes become dummy categorical axes. + assert!(!aes.is_required("pos1")); + assert!(!aes.is_required("pos2")); } #[test] diff --git a/src/plot/layer/geom/point.rs b/src/plot/layer/geom/point.rs index f6b454c9e..3e9c55a6c 100644 --- a/src/plot/layer/geom/point.rs +++ b/src/plot/layer/geom/point.rs @@ -18,8 +18,13 @@ impl GeomTrait for Point { fn aesthetics(&self) -> DefaultAesthetics { DefaultAesthetics { defaults: &[ - ("pos1", DefaultAestheticValue::Required), - ("pos2", DefaultAestheticValue::Required), + // Both axes are dummy-able. Whichever the user omits is + // synthesised as a dummy categorical column by the default + // `apply_stat_transform`; the writer then hides that axis. + // Mapping neither degrades to all points overlapping at a + // single dummy spot — useful only with `aggregate`. + ("pos1", DefaultAestheticValue::Dummy), + ("pos2", DefaultAestheticValue::Dummy), ("size", DefaultAestheticValue::Number(3.0)), ("stroke", DefaultAestheticValue::String("black")), ("fill", DefaultAestheticValue::String("black")), diff --git a/src/plot/layer/geom/range.rs b/src/plot/layer/geom/range.rs index d547187b6..0dd23c207 100644 --- a/src/plot/layer/geom/range.rs +++ b/src/plot/layer/geom/range.rs @@ -18,7 +18,10 @@ impl GeomTrait for Range { fn aesthetics(&self) -> DefaultAesthetics { DefaultAesthetics { defaults: &[ - ("pos1", DefaultAestheticValue::Required), + // pos1 is dummy-able - if no aesthetic in the pos1 family + // is mapped, the default `apply_stat_transform` synthesises + // a dummy categorical axis and the writer hides it. + ("pos1", DefaultAestheticValue::Dummy), ("pos2min", DefaultAestheticValue::Required), ("pos2max", DefaultAestheticValue::Required), ("stroke", DefaultAestheticValue::String("black")), diff --git a/src/plot/layer/geom/ribbon.rs b/src/plot/layer/geom/ribbon.rs index 5b3ca13a3..47f9bc26d 100644 --- a/src/plot/layer/geom/ribbon.rs +++ b/src/plot/layer/geom/ribbon.rs @@ -47,10 +47,6 @@ impl GeomTrait for Ribbon { Some(&["pos1"]) } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn apply_stat_transform( &self, query: &str, diff --git a/src/plot/layer/geom/smooth.rs b/src/plot/layer/geom/smooth.rs index c523201a4..d032ec099 100644 --- a/src/plot/layer/geom/smooth.rs +++ b/src/plot/layer/geom/smooth.rs @@ -78,10 +78,6 @@ impl GeomTrait for Smooth { PARAMS } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn default_remappings(&self) -> DefaultAesthetics { DefaultAesthetics { defaults: &[ diff --git a/src/plot/layer/geom/spatial.rs b/src/plot/layer/geom/spatial.rs index 3ce1df9a4..97766ce92 100644 --- a/src/plot/layer/geom/spatial.rs +++ b/src/plot/layer/geom/spatial.rs @@ -23,10 +23,6 @@ impl GeomTrait for Spatial { } } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn apply_stat_transform( &self, query: &str, diff --git a/src/plot/layer/geom/tile.rs b/src/plot/layer/geom/tile.rs index b4a639022..b166a6222 100644 --- a/src/plot/layer/geom/tile.rs +++ b/src/plot/layer/geom/tile.rs @@ -97,11 +97,6 @@ impl GeomTrait for Tile { ] } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - // Always apply stat transform to validate and consolidate parameters - true - } - /// Every spatial slot is pinned as a group key — the rectangle's position /// and size *define* the group, they are never the thing being summarised. /// Material aesthetics (fill, stroke, opacity, …) pass through to the diff --git a/src/plot/layer/geom/types.rs b/src/plot/layer/geom/types.rs index f0ff1591e..a11fc2041 100644 --- a/src/plot/layer/geom/types.rs +++ b/src/plot/layer/geom/types.rs @@ -158,6 +158,23 @@ impl DefaultAesthetics { .find(|(n, _)| *n == name) .map(|(_, value)| value) } + + /// Names of aesthetics declared as `Dummy` — i.e. position aesthetics + /// the default `apply_stat_transform` should fill in with a synthetic + /// categorical column when the user leaves the whole axis family + /// unmapped. + pub fn dummy_axes(&self) -> Vec<&'static str> { + self.defaults + .iter() + .filter_map(|(name, value)| { + if matches!(value, DefaultAestheticValue::Dummy) { + Some(*name) + } else { + None + } + }) + .collect() + } } /// Result of a statistical transformation @@ -228,6 +245,102 @@ pub fn wrap_with_order_by(input_query: &str, result: StatResult, aesthetic: &str } } +/// Wrap `query` so it produces a literal categorical column carrying the +/// dummy-axis sentinel value for `axis` (`"pos1"` or `"pos2"`). Used by +/// geoms that should still render sensibly when the user omits a position +/// aesthetic. +/// +/// The wrapped query has shape: +/// ```sql +/// SELECT '__ggsql_stat_dummy' AS "__ggsql_stat_", * +/// FROM () AS "__ggsql_dummy_src__" +/// ``` +/// +/// This composes with any stat: pre-wrap the input (when the geom's stat +/// groups by the dummied axis, e.g. boxplot/violin) so the existing +/// `GROUP BY` collapses to a single group, or post-wrap a stat output +/// (aggregate / identity) so the dummy column is just decoration. +pub fn wrap_with_dummy_axis(query: &str, axis: &str) -> String { + let stat_col = naming::stat_column(axis); + let dummy_v = naming::stat_column("dummy"); + format!( + "SELECT '{val}' AS {col}, * FROM ({q}) AS \"__ggsql_dummy_src__\"", + val = dummy_v, + col = naming::quote_ident(&stat_col), + q = query, + ) +} + +/// Post-wrap a `StatResult` to add a dummy column for `axis` (`"pos1"` or +/// `"pos2"`). +/// +/// Wraps the inner query via [`wrap_with_dummy_axis`] (turning `Identity` +/// into a `Transformed` over the original input) and appends `axis` to +/// both `stat_columns` and `dummy_columns` so `execute/layer.rs` flips +/// `is_dummy: true` on the resulting aesthetic. +pub fn wrap_stat_with_dummy_axis(input_query: &str, result: StatResult, axis: &str) -> StatResult { + match result { + StatResult::Identity => StatResult::Transformed { + query: wrap_with_dummy_axis(input_query, axis), + stat_columns: vec![axis.to_string()], + dummy_columns: vec![axis.to_string()], + consumed_aesthetics: vec![], + }, + StatResult::Transformed { + query, + mut stat_columns, + mut dummy_columns, + consumed_aesthetics, + } => { + // Idempotent: a stat that already produced a dummy for this axis + // must not be re-wrapped — the SQL would gain a duplicate column. + let already_dummied = dummy_columns.iter().any(|s| s == axis); + let wrapped = if already_dummied { + query + } else { + wrap_with_dummy_axis(&query, axis) + }; + if !stat_columns.iter().any(|s| s == axis) { + stat_columns.push(axis.to_string()); + } + if !already_dummied { + dummy_columns.push(axis.to_string()); + } + StatResult::Transformed { + query: wrapped, + stat_columns, + dummy_columns, + consumed_aesthetics, + } + } + } +} + +/// Convenience wrapper for the common case of dummying `pos1`. +pub fn wrap_stat_with_dummy_pos1(input_query: &str, result: StatResult) -> StatResult { + wrap_stat_with_dummy_axis(input_query, result, "pos1") +} + +/// Returns true when at least one aesthetic in the same axis family as +/// `axis` (e.g. `pos1`, `pos1min`, `pos1max`, `pos1end`, `pos1offset`) is +/// mapped to a column. +/// +/// Used by the default `apply_stat_transform` to decide whether to fill in +/// a dummy categorical column for an unmapped axis. +pub fn axis_family_has_mapping(aesthetics: &Mappings, axis: &str) -> bool { + use crate::plot::aesthetic::parse_position; + let Some((target_slot, _)) = parse_position(axis) else { + return false; + }; + aesthetics + .aesthetics + .iter() + .any(|(name, value)| match parse_position(name) { + Some((slot, _)) => slot == target_slot && value.column_name().is_some(), + None => false, + }) +} + /// Helper to extract column name from aesthetic value pub fn get_column_name(aesthetics: &Mappings, aesthetic: &str) -> Option { use crate::AestheticValue; @@ -363,6 +476,87 @@ mod tests { } } + #[test] + fn wrap_with_dummy_pos1_produces_expected_sql() { + let wrapped = wrap_with_dummy_axis("SELECT * FROM t", "pos1"); + assert_eq!( + wrapped, + "SELECT '__ggsql_stat_dummy' AS \"__ggsql_stat_pos1\", * FROM (SELECT * FROM t) AS \"__ggsql_dummy_src__\"" + ); + } + + #[test] + fn wrap_stat_with_dummy_pos1_promotes_identity() { + let result = wrap_stat_with_dummy_pos1("SELECT * FROM raw", StatResult::Identity); + match result { + StatResult::Transformed { + query, + stat_columns, + dummy_columns, + consumed_aesthetics, + } => { + assert!(query.contains("__ggsql_stat_dummy")); + assert!(query.contains("__ggsql_stat_pos1")); + assert!(query.contains("SELECT * FROM raw")); + assert_eq!(stat_columns, vec!["pos1".to_string()]); + assert_eq!(dummy_columns, vec!["pos1".to_string()]); + assert!(consumed_aesthetics.is_empty()); + } + _ => panic!("expected Transformed"), + } + } + + #[test] + fn wrap_stat_with_dummy_pos1_extends_transformed_metadata() { + let inner = StatResult::Transformed { + query: "SELECT 1 AS x".to_string(), + stat_columns: vec!["count".to_string()], + dummy_columns: vec![], + consumed_aesthetics: vec!["weight".to_string()], + }; + let result = wrap_stat_with_dummy_pos1("SELECT * FROM raw", inner); + match result { + StatResult::Transformed { + query, + stat_columns, + dummy_columns, + consumed_aesthetics, + } => { + assert!(query.contains("__ggsql_stat_dummy")); + assert!(query.contains("__ggsql_stat_pos1")); + assert!(query.contains("SELECT 1 AS x")); + assert_eq!(stat_columns, vec!["count".to_string(), "pos1".to_string()]); + assert_eq!(dummy_columns, vec!["pos1".to_string()]); + assert_eq!(consumed_aesthetics, vec!["weight".to_string()]); + } + _ => panic!("expected Transformed"), + } + } + + #[test] + fn wrap_stat_with_dummy_pos1_idempotent_on_pos1() { + // Caller already had pos1 in stat_columns/dummy_columns; helper must + // not duplicate. + let inner = StatResult::Transformed { + query: "SELECT 1".to_string(), + stat_columns: vec!["pos1".to_string()], + dummy_columns: vec!["pos1".to_string()], + consumed_aesthetics: vec![], + }; + let result = wrap_stat_with_dummy_pos1("SELECT *", inner); + match result { + StatResult::Transformed { + stat_columns, + dummy_columns, + .. + } => { + assert_eq!(stat_columns, vec!["pos1".to_string()]); + assert_eq!(dummy_columns, vec!["pos1".to_string()]); + } + _ => panic!("expected Transformed"), + } + } + #[test] fn test_color_alias_requires_stroke_or_fill() { // Geom with neither stroke nor fill: color alias should NOT be supported diff --git a/src/plot/layer/geom/violin.rs b/src/plot/layer/geom/violin.rs index 4e9cb20a3..d3ae8c914 100644 --- a/src/plot/layer/geom/violin.rs +++ b/src/plot/layer/geom/violin.rs @@ -1,6 +1,6 @@ //! Violin geom implementation -use super::types::{POSITION_VALUES, SIDE_VALUES}; +use super::types::{wrap_with_dummy_axis, POSITION_VALUES, SIDE_VALUES}; use super::{DefaultAesthetics, GeomTrait, GeomType, StatResult}; use crate::{ naming, @@ -36,7 +36,11 @@ impl GeomTrait for Violin { fn aesthetics(&self) -> DefaultAesthetics { DefaultAesthetics { defaults: &[ - ("pos1", DefaultAestheticValue::Required), + // pos1 is dummy-able. `stat_violin` handles the synthesis + // itself by pre-wrapping the source query, so the density + // grouping collapses to a single violin of the whole pos2 + // distribution. + ("pos1", DefaultAestheticValue::Dummy), ("pos2", DefaultAestheticValue::Required), ("weight", DefaultAestheticValue::Null), ("fill", DefaultAestheticValue::String("black")), @@ -49,10 +53,6 @@ impl GeomTrait for Violin { } } - fn needs_stat_transform(&self, _aesthetics: &Mappings) -> bool { - true - } - fn default_params(&self) -> &'static [ParamDefinition] { const PARAMS: &[ParamDefinition] = &[ ParamDefinition { @@ -210,28 +210,64 @@ fn stat_violin( )); } + // pos1 is optional. When the user omits it, wrap the source with a + // synthetic dummy categorical column and group by that column so the + // density stat collapses to a single violin spanning the whole dataset. let mut group_by = group_by.to_vec(); - if let Some(x_col) = get_column_name(aesthetics, "pos1") { - // We want to ensure x is included as a grouping - if !group_by.contains(&x_col) { - group_by.push(x_col); + let (working_query, use_dummy) = match get_column_name(aesthetics, "pos1") { + Some(x_col) => { + if !group_by.contains(&x_col) { + group_by.push(x_col); + } + (query.to_string(), false) } - } else { - return Err(GgsqlError::ValidationError( - "Violin requires 'x' aesthetic mapping (categorical)".to_string(), - )); - } + None => { + let dummy_col = naming::stat_column("pos1"); + group_by.push(dummy_col); + (wrap_with_dummy_axis(query, "pos1"), true) + } + }; // Violin uses tails parameter from user (default 3.0 set in default_params) - super::density::stat_density( - query, + let inner = super::density::stat_density( + &working_query, aesthetics, "pos2", None, group_by.as_slice(), parameters, dialect, - ) + )?; + + if !use_dummy { + return Ok(inner); + } + + // Density returned its own Transformed result; tag it with the dummy + // column metadata so execute/layer.rs marks the resulting pos1 aesthetic + // as a dummy and the writer suppresses the axis. + match inner { + StatResult::Identity => unreachable!("stat_density always returns Transformed"), + StatResult::Transformed { + query, + mut stat_columns, + mut dummy_columns, + consumed_aesthetics, + } => { + if !stat_columns.iter().any(|s| s == "pos1") { + stat_columns.push("pos1".to_string()); + } + if !dummy_columns.iter().any(|s| s == "pos1") { + dummy_columns.push("pos1".to_string()); + } + Ok(StatResult::Transformed { + query, + stat_columns, + dummy_columns, + consumed_aesthetics, + }) + } + } } #[cfg(test)] @@ -579,6 +615,55 @@ mod tests { assert!((values[2] - 0.3).abs() < 1e-6, "1.0 should become 0.3"); } + #[test] + fn test_violin_dummy_pos1_when_unmapped() { + // pos2 only - pos1 omitted should produce a single violin via dummy x. + let query = "SELECT flipper_length FROM penguins"; + let mut aesthetics = Mappings::new(); + aesthetics.insert( + "pos2".to_string(), + AestheticValue::standard_column("flipper_length".to_string()), + ); + let groups: Vec = vec![]; + let mut parameters = HashMap::new(); + parameters.insert("bandwidth".to_string(), ParameterValue::Number(5.0)); + parameters.insert( + "kernel".to_string(), + ParameterValue::String("gaussian".to_string()), + ); + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let setup_sql = "CREATE TABLE penguins AS SELECT * FROM (VALUES + (181.0), (186.0), (195.0), (217.0), (221.0), (230.0), (192.0) + ) AS t(flipper_length)"; + reader.execute_sql(setup_sql).unwrap(); + let execute = |sql: &str| reader.execute_sql(sql); + + let result = stat_violin(query, &aesthetics, &groups, ¶meters, &AnsiDialect) + .expect("stat_violin should succeed without pos1"); + + match result { + StatResult::Transformed { + query: stat_query, + stat_columns, + dummy_columns, + .. + } => { + assert!(stat_columns.contains(&"pos1".to_string())); + assert_eq!(dummy_columns, vec!["pos1".to_string()]); + assert!(stat_query.contains("__ggsql_stat_dummy")); + assert!(stat_query.contains("__ggsql_stat_pos1")); + + let df = execute(&stat_query).expect("Generated SQL should execute"); + assert!(df.height() > 0); + let pos1_col = df.column("__ggsql_stat_pos1").unwrap(); + let unique = count_unique_strings(pos1_col); + assert_eq!(unique, 1, "dummy pos1 should collapse to one group"); + } + _ => panic!("Expected Transformed result"), + } + } + #[test] fn test_violin_post_process_no_offset_column() { use crate::df; diff --git a/src/plot/layer/orientation.rs b/src/plot/layer/orientation.rs index 797b06821..e38a08ce3 100644 --- a/src/plot/layer/orientation.rs +++ b/src/plot/layer/orientation.rs @@ -23,11 +23,11 @@ //! - For two-axis geoms (bar, boxplot): if pos1 is continuous and pos2 is discrete → "transposed" //! - For single-axis geoms (histogram, density): if pos2 has a scale but pos1 doesn't → "transposed" -use super::geom::GeomType; +use super::geom::{Geom, GeomType}; use super::Layer; use crate::plot::aesthetic::{is_position_aesthetic, AestheticContext}; use crate::plot::scale::ScaleTypeKind; -use crate::plot::{AestheticValue, Mappings, Scale}; +use crate::plot::{AestheticValue, DefaultAestheticValue, Mappings, Scale}; use crate::{naming, DataFrame}; /// Orientation value for aligned/vertical orientation. @@ -125,7 +125,7 @@ pub fn geom_has_implicit_orientation(geom: &GeomType) -> bool { /// 4. **Default**: Primary fn detect_from_scales( scales: &[Scale], - _geom: &GeomType, + geom: &GeomType, mappings: &Mappings, remappings: &Mappings, ) -> &'static str { @@ -152,12 +152,21 @@ fn detect_from_scales( let has_pos1 = pos1_scale.is_some(); let has_pos2 = pos2_scale.is_some(); - // Rule 1: Single scale present - that axis is primary + // Rule 1: Single scale present - that axis is primary. // Only apply when there are explicit position mappings; otherwise the user // is just customizing a scale (e.g., SCALE y SETTING expand) without intending // to change orientation. The geom's default_remappings will define orientation. + // + // If the geom declares `pos1` as `Dummy` and the user hasn't mapped it, + // pos1 *is* the (synthetic) primary axis — leave the layer aligned so the + // stat fills it in. Auto-transposing in that case would push the dummy + // onto the secondary axis, which is never what the user means. if has_pos1_mapping || has_pos2_mapping { - if has_pos2 && !has_pos1 { + let pos1_is_dummy = matches!( + Geom::from_type(*geom).aesthetics().get("pos1"), + Some(DefaultAestheticValue::Dummy) + ); + if has_pos2 && !has_pos1 && (!pos1_is_dummy || has_pos1_mapping) { return TRANSPOSED; } if has_pos1 && !has_pos2 { diff --git a/src/plot/main.rs b/src/plot/main.rs index 07018e0c8..4535c0bbf 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -366,10 +366,11 @@ mod tests { assert!(valid_point.validate_mapping(&None, false).is_ok()); - let invalid_point = Layer::new(Geom::point()) + // Line still requires both pos1 and pos2 - mapping only one fails. + let invalid_line = Layer::new(Geom::line()) .with_aesthetic("pos1".to_string(), AestheticValue::standard_column("x")); - assert!(invalid_point.validate_mapping(&None, false).is_err()); + assert!(invalid_line.validate_mapping(&None, false).is_err()); let valid_ribbon = Layer::new(Geom::ribbon()) .with_aesthetic("pos1".to_string(), AestheticValue::standard_column("x")) @@ -488,7 +489,9 @@ mod tests { assert!(point.is_supported("size")); assert!(point.is_supported("shape")); assert!(!point.is_supported("linetype")); - assert_eq!(point.required(), &["pos1", "pos2"]); + // Both axes are optional - omitted axes become dummy categorical + // axes (strip plot, or single dot when both omitted + aggregate). + assert!(point.required().is_empty()); // Line geom let line = Geom::line().aesthetics(); @@ -526,10 +529,10 @@ mod tests { &["pos1", "pos2", "pos1end", "pos2end"] ); - // Range requires pos1, pos2min, pos2max + // Range requires pos2min, pos2max; pos1 is optional (omit → dummy axis). assert_eq!( Geom::range().aesthetics().required(), - &["pos1", "pos2min", "pos2max"] + &["pos2min", "pos2max"] ); } diff --git a/src/plot/types.rs b/src/plot/types.rs index 534dc1027..ba69fc8f1 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -303,6 +303,15 @@ pub enum DefaultAestheticValue { Required, /// Delayed aesthetic (produced by stat transform, valid for REMAPPING only, not MAPPING) Delayed, + /// Optional position aesthetic that, when the user leaves the whole + /// axis family unmapped, is filled with a synthetic dummy categorical + /// column by the default `apply_stat_transform`. The writer hides the + /// resulting one-tick axis. Use only on `pos1`/`pos2`. + /// + /// The `Geom` wrapper auto-augments `default_remappings()` and + /// `valid_stat_columns()` with the appropriate entries, so a geom that + /// declares this variant doesn't need to spell those out. + Dummy, } impl DefaultAestheticValue { @@ -316,7 +325,9 @@ impl DefaultAestheticValue { Self::String(s) => ParameterValue::String(s.to_string()), Self::Number(n) => ParameterValue::Number(*n), Self::Boolean(b) => ParameterValue::Boolean(*b), - Self::Column(_) | Self::Null | Self::Required | Self::Delayed => ParameterValue::Null, + Self::Column(_) | Self::Null | Self::Required | Self::Delayed | Self::Dummy => { + ParameterValue::Null + } } } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 01ecc5481..84a54f2da 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -1344,6 +1344,180 @@ mod tests { ); } + #[test] + fn test_boxplot_dummy_x() { + // Boxplot with only y mapped: should render a single boxplot of the + // whole distribution and suppress the categorical x axis. + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + VISUALISE FROM ggsql:penguins + DRAW boxplot MAPPING bill_len AS y + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + // Boxplot is a composite renderer (multiple sub-layers). Check that + // the first layer's x encoding suppresses its axis. + let layer = data_layer(&json, 0); + let encoding = &layer["encoding"]; + assert!( + encoding["x"]["axis"].is_null(), + "Boxplot dummy x should have axis: null. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + } + + #[test] + fn test_violin_dummy_x() { + // Violin with only y mapped: single violin spanning the whole dataset. + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + VISUALISE FROM ggsql:penguins + DRAW violin MAPPING bill_len AS y + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + let layer = data_layer(&json, 0); + let encoding = &layer["encoding"]; + assert!( + encoding["x"]["axis"].is_null(), + "Violin dummy x should have axis: null. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + } + + #[test] + fn test_point_dummy_x() { + // Point with only y mapped: strip plot at a single dummy x position. + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + VISUALISE FROM ggsql:penguins + DRAW point MAPPING bill_len AS y + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + let layer = data_layer(&json, 0); + let encoding = &layer["encoding"]; + assert!( + encoding["x"]["axis"].is_null(), + "Point dummy x should have axis: null. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + } + + #[test] + fn test_range_dummy_x() { + // Range with only ymin/ymax mapped: a single vertical interval. + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + SELECT 10.0 AS lo, 20.0 AS hi + VISUALISE + DRAW range MAPPING lo AS ymin, hi AS ymax + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + let layer = data_layer(&json, 0); + let encoding = &layer["encoding"]; + assert!( + encoding["x"]["axis"].is_null(), + "Range dummy x should have axis: null. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + } + + #[test] + fn test_point_dummy_y() { + // Symmetric to test_point_dummy_x: only x mapped means dummy y. + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + VISUALISE FROM ggsql:penguins + DRAW point MAPPING bill_len AS x + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + let layer = data_layer(&json, 0); + let encoding = &layer["encoding"]; + assert!( + encoding["y"]["axis"].is_null(), + "Point dummy y should have axis: null. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + } + + #[test] + fn test_point_dummy_both_with_aggregate() { + // Both axes omitted, but aggregate gives the single point meaning: + // a count of all rows at the dummy x/y intersection. + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + VISUALISE FROM ggsql:penguins + DRAW point MAPPING bill_len AS size + SETTING aggregate => 'size:count' + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + let layer = data_layer(&json, 0); + let encoding = &layer["encoding"]; + assert!( + encoding["x"]["axis"].is_null(), + "Both-dummy point should hide x axis. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + assert!( + encoding["y"]["axis"].is_null(), + "Both-dummy point should hide y axis. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + } + + #[test] + fn test_point_dummy_x_with_aggregate() { + // Point with aggregate SETTING and no x mapping: should aggregate the + // whole dataset to a single point and suppress the dummy x axis. + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + VISUALISE FROM ggsql:penguins + DRAW point MAPPING bill_len AS y + SETTING aggregate => 'mean' + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + let json: serde_json::Value = serde_json::from_str(&result).unwrap(); + let layer = data_layer(&json, 0); + let encoding = &layer["encoding"]; + assert!( + encoding["x"]["axis"].is_null(), + "Aggregated point with dummy x should have axis: null. Encoding: {}", + serde_json::to_string_pretty(encoding).unwrap() + ); + } + #[test] fn test_bar_chart_with_expand_setting() { // Test bar chart with SCALE y SETTING expand - should work even when y is stat-derived diff --git a/src/validate.rs b/src/validate.rs index 8c7e715c6..d4e2245bf 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -315,9 +315,9 @@ mod tests { #[test] fn test_validate_missing_required_aesthetic() { - // Point requires x and y, but we only provide x + // Line requires both x and y; mapping only x is invalid. let validated = - validate("SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x").unwrap(); + validate("SELECT 1 as x, 2 as y VISUALISE DRAW line MAPPING x AS x").unwrap(); assert!(!validated.valid()); assert!(!validated.errors().is_empty()); assert!(validated.errors()[0].message.contains("y"));