Skip to content

Commit de092b9

Browse files
committed
Use portable SQL
1 parent fd8affb commit de092b9

6 files changed

Lines changed: 423 additions & 119 deletions

File tree

src/naming.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,16 @@ pub const SOURCE_COLUMN: &str = concatcp!(GGSQL_PREFIX, "source", GGSQL_SUFFIX);
8888
/// Alias for schema extraction queries
8989
pub const SCHEMA_ALIAS: &str = concatcp!(GGSQL_SUFFIX, "schema", GGSQL_SUFFIX);
9090

91+
// ============================================================================
92+
// Internal SQL Aliases
93+
// ============================================================================
94+
95+
/// CTE name for recursive base counter in generate_series: `__ggsql_base__`
96+
pub const SERIES_BASE: &str = concatcp!(GGSQL_PREFIX, "base", GGSQL_SUFFIX);
97+
98+
/// CTE name for sequence output from generate_series: `__ggsql_seq__`
99+
pub const SERIES_SEQ: &str = concatcp!(GGSQL_PREFIX, "seq", GGSQL_SUFFIX);
100+
91101
// ============================================================================
92102
// Constructor Functions
93103
// ============================================================================

src/plot/layer/geom/boxplot.rs

Lines changed: 54 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::{
99
geom::types::get_column_name, DefaultAestheticValue, DefaultParam, DefaultParamValue,
1010
ParameterValue, StatResult,
1111
},
12-
utils::{scalar_max, scalar_min},
12+
utils::{sql_greatest, sql_least, sql_percentile},
1313
DataFrame, GgsqlError, Mappings, Result,
1414
};
1515

@@ -164,8 +164,11 @@ fn stat_boxplot(
164164

165165
fn boxplot_sql_compute_summary(from: &str, groups: &[String], value: &str, coef: &f64) -> String {
166166
let groups_str = groups.join(", ");
167-
let lower_expr = scalar_max(&[&format!("q1 - {coef} * (q3 - q1)"), "min"]);
168-
let upper_expr = scalar_min(&[&format!("q3 + {coef} * (q3 - q1)"), "max"]);
167+
let lower_expr = sql_greatest(&[&format!("q1 - {coef} * (q3 - q1)"), "min"]);
168+
let upper_expr = sql_least(&[&format!("q3 + {coef} * (q3 - q1)"), "max"]);
169+
let q1 = sql_percentile(value, 0.25, from, groups);
170+
let median = sql_percentile(value, 0.50, from, groups);
171+
let q3 = sql_percentile(value, 0.75, from, groups);
169172
format!(
170173
"SELECT
171174
*,
@@ -176,18 +179,21 @@ fn boxplot_sql_compute_summary(from: &str, groups: &[String], value: &str, coef:
176179
{groups},
177180
MIN({value}) AS min,
178181
MAX({value}) AS max,
179-
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY {value}) AS q1,
180-
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {value}) AS median,
181-
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {value}) AS q3
182-
FROM ({from}) AS __ggsql_qt__
182+
{q1} AS q1,
183+
{median} AS median,
184+
{q3} AS q3
185+
FROM ({from}) AS _qt
183186
WHERE {value} IS NOT NULL
184187
GROUP BY {groups}
185-
) AS __ggsql_fn__",
188+
) AS _fn",
186189
lower_expr = lower_expr,
187190
upper_expr = upper_expr,
188191
groups = groups_str,
189192
value = value,
190-
from = from
193+
from = from,
194+
q1 = q1,
195+
median = median,
196+
q3 = q3,
191197
)
192198
}
193199

@@ -309,32 +315,37 @@ mod tests {
309315
fn test_sql_compute_summary_basic() {
310316
let groups = vec!["category".to_string()];
311317
let result = boxplot_sql_compute_summary("data", &groups, "value", &1.5);
312-
assert!(result.contains("PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY value)"));
313-
assert!(result.contains("PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY value)"));
314-
assert!(result.contains("PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY value)"));
318+
assert!(result.contains("NTILE(4) OVER (ORDER BY value)"));
319+
assert!(result.contains("AS q1"));
320+
assert!(result.contains("AS median"));
321+
assert!(result.contains("AS q3"));
315322
assert!(result.contains("MIN(value) AS min"));
316323
assert!(result.contains("MAX(value) AS max"));
317324
assert!(result.contains("WHERE value IS NOT NULL"));
318325
assert!(result.contains("GROUP BY category"));
319-
assert!(result.contains("SELECT MAX(v) FROM (VALUES (q1 - 1.5"));
320-
assert!(result.contains("SELECT MIN(v) FROM (VALUES (q3 + 1.5"));
326+
assert!(result.contains("CASE WHEN (q1 - 1.5"));
327+
assert!(result.contains("CASE WHEN (q3 + 1.5"));
321328
}
322329

323330
#[test]
324331
fn test_sql_compute_summary_multiple_groups() {
325332
let groups = vec!["cat".to_string(), "region".to_string()];
326333
let result = boxplot_sql_compute_summary("tbl", &groups, "val", &1.5);
327334
assert!(result.contains("GROUP BY cat, region"));
328-
assert!(result.contains("PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY val)"));
335+
assert!(result.contains("NTILE(4) OVER (ORDER BY val)"));
329336
}
330337

331338
#[test]
332339
fn test_sql_compute_summary_custom_coef() {
333340
let groups = vec!["pos1".to_string()];
334341
let result = boxplot_sql_compute_summary("q", &groups, "pos2", &2.5);
335342
assert!(result.contains("2.5"));
336-
assert!(result.contains("SELECT MAX(v) FROM (VALUES (q1 - 2.5 * (q3 - q1)), (min)) AS t(v)"));
337-
assert!(result.contains("SELECT MIN(v) FROM (VALUES (q3 + 2.5 * (q3 - q1)), (max)) AS t(v)"));
343+
assert!(
344+
result.contains("(CASE WHEN (q1 - 2.5 * (q3 - q1)) >= (min) THEN (q1 - 2.5 * (q3 - q1)) ELSE (min) END)")
345+
);
346+
assert!(
347+
result.contains("(CASE WHEN (q3 + 2.5 * (q3 - q1)) <= (max) THEN (q3 + 2.5 * (q3 - q1)) ELSE (max) END)")
348+
);
338349
}
339350

340351
#[test]
@@ -355,22 +366,27 @@ mod tests {
355366
let groups = vec!["category".to_string()];
356367
let result = boxplot_sql_compute_summary("SELECT * FROM sales", &groups, "price", &1.5);
357368

358-
let expected = r#"SELECT
369+
let q1 = sql_percentile("price", 0.25, "SELECT * FROM sales", &groups);
370+
let median = sql_percentile("price", 0.50, "SELECT * FROM sales", &groups);
371+
let q3 = sql_percentile("price", 0.75, "SELECT * FROM sales", &groups);
372+
let expected = format!(
373+
r#"SELECT
359374
*,
360-
(SELECT MAX(v) FROM (VALUES (q1 - 1.5 * (q3 - q1)), (min)) AS t(v)) AS lower,
361-
(SELECT MIN(v) FROM (VALUES (q3 + 1.5 * (q3 - q1)), (max)) AS t(v)) AS upper
375+
(CASE WHEN (q1 - 1.5 * (q3 - q1)) >= (min) THEN (q1 - 1.5 * (q3 - q1)) ELSE (min) END) AS lower,
376+
(CASE WHEN (q3 + 1.5 * (q3 - q1)) <= (max) THEN (q3 + 1.5 * (q3 - q1)) ELSE (max) END) AS upper
362377
FROM (
363378
SELECT
364379
category,
365380
MIN(price) AS min,
366381
MAX(price) AS max,
367-
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY price) AS q1,
368-
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY price) AS median,
369-
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY price) AS q3
370-
FROM (SELECT * FROM sales) AS __ggsql_qt__
382+
{q1} AS q1,
383+
{median} AS median,
384+
{q3} AS q3
385+
FROM (SELECT * FROM sales) AS _qt
371386
WHERE price IS NOT NULL
372387
GROUP BY category
373-
) AS __ggsql_fn__"#;
388+
) AS _fn"#
389+
);
374390

375391
assert_eq!(result, expected);
376392
}
@@ -380,22 +396,27 @@ mod tests {
380396
let groups = vec!["region".to_string(), "product".to_string()];
381397
let result = boxplot_sql_compute_summary("SELECT * FROM data", &groups, "revenue", &1.5);
382398

383-
let expected = r#"SELECT
399+
let q1 = sql_percentile("revenue", 0.25, "SELECT * FROM data", &groups);
400+
let median = sql_percentile("revenue", 0.50, "SELECT * FROM data", &groups);
401+
let q3 = sql_percentile("revenue", 0.75, "SELECT * FROM data", &groups);
402+
let expected = format!(
403+
r#"SELECT
384404
*,
385-
(SELECT MAX(v) FROM (VALUES (q1 - 1.5 * (q3 - q1)), (min)) AS t(v)) AS lower,
386-
(SELECT MIN(v) FROM (VALUES (q3 + 1.5 * (q3 - q1)), (max)) AS t(v)) AS upper
405+
(CASE WHEN (q1 - 1.5 * (q3 - q1)) >= (min) THEN (q1 - 1.5 * (q3 - q1)) ELSE (min) END) AS lower,
406+
(CASE WHEN (q3 + 1.5 * (q3 - q1)) <= (max) THEN (q3 + 1.5 * (q3 - q1)) ELSE (max) END) AS upper
387407
FROM (
388408
SELECT
389409
region, product,
390410
MIN(revenue) AS min,
391411
MAX(revenue) AS max,
392-
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY revenue) AS q1,
393-
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY revenue) AS median,
394-
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY revenue) AS q3
395-
FROM (SELECT * FROM data) AS __ggsql_qt__
412+
{q1} AS q1,
413+
{median} AS median,
414+
{q3} AS q3
415+
FROM (SELECT * FROM data) AS _qt
396416
WHERE revenue IS NOT NULL
397417
GROUP BY region, product
398-
) AS __ggsql_fn__"#;
418+
) AS _fn"#
419+
);
399420

400421
assert_eq!(result, expected);
401422
}

0 commit comments

Comments
 (0)