@@ -9,7 +9,7 @@ use crate::{
99 geom:: types:: get_column_name, DefaultAestheticValue , DefaultParam , DefaultParamValue ,
1010 ParameterValue , StatResult ,
1111 } ,
12- utils:: { scalar_max , scalar_min } ,
12+ utils:: { sql_greatest , sql_least , sql_percentile } ,
1313 DataFrame , GgsqlError , Mappings , Result ,
1414} ;
1515
@@ -164,8 +164,11 @@ fn stat_boxplot(
164164
165165fn boxplot_sql_compute_summary ( from : & str , groups : & [ String ] , value : & str , coef : & f64 ) -> String {
166166 let groups_str = groups. join ( ", " ) ;
167- let lower_expr = scalar_max ( & [ & format ! ( "q1 - {coef} * (q3 - q1)" ) , "min" ] ) ;
168- let upper_expr = scalar_min ( & [ & format ! ( "q3 + {coef} * (q3 - q1)" ) , "max" ] ) ;
167+ let lower_expr = sql_greatest ( & [ & format ! ( "q1 - {coef} * (q3 - q1)" ) , "min" ] ) ;
168+ let upper_expr = sql_least ( & [ & format ! ( "q3 + {coef} * (q3 - q1)" ) , "max" ] ) ;
169+ let q1 = sql_percentile ( value, 0.25 , from, groups) ;
170+ let median = sql_percentile ( value, 0.50 , from, groups) ;
171+ let q3 = sql_percentile ( value, 0.75 , from, groups) ;
169172 format ! (
170173 "SELECT
171174 *,
@@ -176,18 +179,21 @@ fn boxplot_sql_compute_summary(from: &str, groups: &[String], value: &str, coef:
176179 {groups},
177180 MIN({value}) AS min,
178181 MAX({value}) AS max,
179- PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY {value}) AS q1,
180- PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {value}) AS median,
181- PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {value}) AS q3
182- FROM ({from}) AS __ggsql_qt__
182+ {q1} AS q1,
183+ {median} AS median,
184+ {q3} AS q3
185+ FROM ({from}) AS _qt
183186 WHERE {value} IS NOT NULL
184187 GROUP BY {groups}
185- ) AS __ggsql_fn__ " ,
188+ ) AS _fn " ,
186189 lower_expr = lower_expr,
187190 upper_expr = upper_expr,
188191 groups = groups_str,
189192 value = value,
190- from = from
193+ from = from,
194+ q1 = q1,
195+ median = median,
196+ q3 = q3,
191197 )
192198}
193199
@@ -309,32 +315,37 @@ mod tests {
309315 fn test_sql_compute_summary_basic ( ) {
310316 let groups = vec ! [ "category" . to_string( ) ] ;
311317 let result = boxplot_sql_compute_summary ( "data" , & groups, "value" , & 1.5 ) ;
312- assert ! ( result. contains( "PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY value)" ) ) ;
313- assert ! ( result. contains( "PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY value)" ) ) ;
314- assert ! ( result. contains( "PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY value)" ) ) ;
318+ assert ! ( result. contains( "NTILE(4) OVER (ORDER BY value)" ) ) ;
319+ assert ! ( result. contains( "AS q1" ) ) ;
320+ assert ! ( result. contains( "AS median" ) ) ;
321+ assert ! ( result. contains( "AS q3" ) ) ;
315322 assert ! ( result. contains( "MIN(value) AS min" ) ) ;
316323 assert ! ( result. contains( "MAX(value) AS max" ) ) ;
317324 assert ! ( result. contains( "WHERE value IS NOT NULL" ) ) ;
318325 assert ! ( result. contains( "GROUP BY category" ) ) ;
319- assert ! ( result. contains( "SELECT MAX(v) FROM (VALUES (q1 - 1.5" ) ) ;
320- assert ! ( result. contains( "SELECT MIN(v) FROM (VALUES (q3 + 1.5" ) ) ;
326+ assert ! ( result. contains( "CASE WHEN (q1 - 1.5" ) ) ;
327+ assert ! ( result. contains( "CASE WHEN (q3 + 1.5" ) ) ;
321328 }
322329
323330 #[ test]
324331 fn test_sql_compute_summary_multiple_groups ( ) {
325332 let groups = vec ! [ "cat" . to_string( ) , "region" . to_string( ) ] ;
326333 let result = boxplot_sql_compute_summary ( "tbl" , & groups, "val" , & 1.5 ) ;
327334 assert ! ( result. contains( "GROUP BY cat, region" ) ) ;
328- assert ! ( result. contains( "PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY val)" ) ) ;
335+ assert ! ( result. contains( "NTILE(4) OVER (ORDER BY val)" ) ) ;
329336 }
330337
331338 #[ test]
332339 fn test_sql_compute_summary_custom_coef ( ) {
333340 let groups = vec ! [ "pos1" . to_string( ) ] ;
334341 let result = boxplot_sql_compute_summary ( "q" , & groups, "pos2" , & 2.5 ) ;
335342 assert ! ( result. contains( "2.5" ) ) ;
336- assert ! ( result. contains( "SELECT MAX(v) FROM (VALUES (q1 - 2.5 * (q3 - q1)), (min)) AS t(v)" ) ) ;
337- assert ! ( result. contains( "SELECT MIN(v) FROM (VALUES (q3 + 2.5 * (q3 - q1)), (max)) AS t(v)" ) ) ;
343+ assert ! (
344+ result. contains( "(CASE WHEN (q1 - 2.5 * (q3 - q1)) >= (min) THEN (q1 - 2.5 * (q3 - q1)) ELSE (min) END)" )
345+ ) ;
346+ assert ! (
347+ result. contains( "(CASE WHEN (q3 + 2.5 * (q3 - q1)) <= (max) THEN (q3 + 2.5 * (q3 - q1)) ELSE (max) END)" )
348+ ) ;
338349 }
339350
340351 #[ test]
@@ -355,22 +366,27 @@ mod tests {
355366 let groups = vec ! [ "category" . to_string( ) ] ;
356367 let result = boxplot_sql_compute_summary ( "SELECT * FROM sales" , & groups, "price" , & 1.5 ) ;
357368
358- let expected = r#"SELECT
369+ let q1 = sql_percentile ( "price" , 0.25 , "SELECT * FROM sales" , & groups) ;
370+ let median = sql_percentile ( "price" , 0.50 , "SELECT * FROM sales" , & groups) ;
371+ let q3 = sql_percentile ( "price" , 0.75 , "SELECT * FROM sales" , & groups) ;
372+ let expected = format ! (
373+ r#"SELECT
359374 *,
360- (SELECT MAX(v) FROM (VALUES (q1 - 1.5 * (q3 - q1)), (min)) AS t(v) ) AS lower,
361- (SELECT MIN(v) FROM (VALUES (q3 + 1.5 * (q3 - q1)), (max)) AS t(v) ) AS upper
375+ (CASE WHEN (q1 - 1.5 * (q3 - q1)) >= (min) THEN (q1 - 1.5 * (q3 - q1)) ELSE (min) END ) AS lower,
376+ (CASE WHEN (q3 + 1.5 * (q3 - q1)) <= (max) THEN (q3 + 1.5 * (q3 - q1)) ELSE (max) END ) AS upper
362377 FROM (
363378 SELECT
364379 category,
365380 MIN(price) AS min,
366381 MAX(price) AS max,
367- PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY price) AS q1,
368- PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY price) AS median,
369- PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY price) AS q3
370- FROM (SELECT * FROM sales) AS __ggsql_qt__
382+ {q1} AS q1,
383+ {median} AS median,
384+ {q3} AS q3
385+ FROM (SELECT * FROM sales) AS _qt
371386 WHERE price IS NOT NULL
372387 GROUP BY category
373- ) AS __ggsql_fn__"# ;
388+ ) AS _fn"#
389+ ) ;
374390
375391 assert_eq ! ( result, expected) ;
376392 }
@@ -380,22 +396,27 @@ mod tests {
380396 let groups = vec ! [ "region" . to_string( ) , "product" . to_string( ) ] ;
381397 let result = boxplot_sql_compute_summary ( "SELECT * FROM data" , & groups, "revenue" , & 1.5 ) ;
382398
383- let expected = r#"SELECT
399+ let q1 = sql_percentile ( "revenue" , 0.25 , "SELECT * FROM data" , & groups) ;
400+ let median = sql_percentile ( "revenue" , 0.50 , "SELECT * FROM data" , & groups) ;
401+ let q3 = sql_percentile ( "revenue" , 0.75 , "SELECT * FROM data" , & groups) ;
402+ let expected = format ! (
403+ r#"SELECT
384404 *,
385- (SELECT MAX(v) FROM (VALUES (q1 - 1.5 * (q3 - q1)), (min)) AS t(v) ) AS lower,
386- (SELECT MIN(v) FROM (VALUES (q3 + 1.5 * (q3 - q1)), (max)) AS t(v) ) AS upper
405+ (CASE WHEN (q1 - 1.5 * (q3 - q1)) >= (min) THEN (q1 - 1.5 * (q3 - q1)) ELSE (min) END ) AS lower,
406+ (CASE WHEN (q3 + 1.5 * (q3 - q1)) <= (max) THEN (q3 + 1.5 * (q3 - q1)) ELSE (max) END ) AS upper
387407 FROM (
388408 SELECT
389409 region, product,
390410 MIN(revenue) AS min,
391411 MAX(revenue) AS max,
392- PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY revenue) AS q1,
393- PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY revenue) AS median,
394- PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY revenue) AS q3
395- FROM (SELECT * FROM data) AS __ggsql_qt__
412+ {q1} AS q1,
413+ {median} AS median,
414+ {q3} AS q3
415+ FROM (SELECT * FROM data) AS _qt
396416 WHERE revenue IS NOT NULL
397417 GROUP BY region, product
398- ) AS __ggsql_fn__"# ;
418+ ) AS _fn"#
419+ ) ;
399420
400421 assert_eq ! ( result, expected) ;
401422 }
0 commit comments