From 4310ec85837a7b430bc9a85ce427d37bbcd464d3 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 3 Mar 2026 05:48:44 -0500 Subject: [PATCH 1/2] Add tests for simplifying multiple aggregate expressions --- .../test_files/aggregates_simplify.slt | 344 ++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 datafusion/sqllogictest/test_files/aggregates_simplify.slt diff --git a/datafusion/sqllogictest/test_files/aggregates_simplify.slt b/datafusion/sqllogictest/test_files/aggregates_simplify.slt new file mode 100644 index 000000000000..cc2e40540b98 --- /dev/null +++ b/datafusion/sqllogictest/test_files/aggregates_simplify.slt @@ -0,0 +1,344 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +####### +# Tests for aggregate optimizations / simplifications +####### + +statement ok +CREATE TABLE sum_simplify_t AS VALUES (1, 100), (1, 200), (2, 100), (NULL, NULL); + +# Baseline SUM of an expression +query I +SELECT SUM(column1 + 1) FROM sum_simplify_t; +---- +7 + +query TT +EXPLAIN SELECT SUM(column1 + 1) FROM sum_simplify_t; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[sum(sum_simplify_t.column1 + Int64(1))]] +02)--TableScan: sum_simplify_t projection=[column1] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[sum(sum_simplify_t.column1 + Int64(1))] +02)--DataSourceExec: partitions=1, partition_sizes=[1] + + +# Mixed aggregate expressions with type validation +query TI +SELECT arrow_typeof(SUM(column1)), SUM(column1 + 1) FROM sum_simplify_t; +---- +Int64 7 + +query TT +EXPLAIN SELECT arrow_typeof(SUM(column1)), SUM(column1), SUM(column1 + 1) FROM sum_simplify_t; +---- +logical_plan +01)Projection: arrow_typeof(sum(sum_simplify_t.column1)), sum(sum_simplify_t.column1), sum(sum_simplify_t.column1 + Int64(1)) +02)--Aggregate: groupBy=[[]], aggr=[[sum(sum_simplify_t.column1), sum(sum_simplify_t.column1 + Int64(1))]] +03)----TableScan: sum_simplify_t projection=[column1] +physical_plan +01)ProjectionExec: expr=[arrow_typeof(sum(sum_simplify_t.column1)@0) as arrow_typeof(sum(sum_simplify_t.column1)), sum(sum_simplify_t.column1)@0 as sum(sum_simplify_t.column1), sum(sum_simplify_t.column1 + Int64(1))@1 as sum(sum_simplify_t.column1 + Int64(1))] +02)--AggregateExec: mode=Single, gby=[], aggr=[sum(sum_simplify_t.column1), sum(sum_simplify_t.column1 + Int64(1))] +03)----DataSourceExec: partitions=1, partition_sizes=[1] + +# Duplicate aggregate expressions +query II +SELECT SUM(column1 + 1) AS sum_plus_1_a, SUM(column1 + 1) AS sum_plus_1_b FROM sum_simplify_t; +---- +7 7 + +query TT +EXPLAIN SELECT SUM(column1 + 1) AS sum_plus_1_a, SUM(column1 + 1) AS sum_plus_1_b FROM sum_simplify_t; +---- +logical_plan +01)Projection: sum(sum_simplify_t.column1 + Int64(1)) AS sum_plus_1_a, sum(sum_simplify_t.column1 + Int64(1)) AS sum_plus_1_b +02)--Aggregate: groupBy=[[]], aggr=[[sum(sum_simplify_t.column1 + Int64(1))]] +03)----TableScan: sum_simplify_t projection=[column1] +physical_plan +01)ProjectionExec: expr=[sum(sum_simplify_t.column1 + Int64(1))@0 as sum_plus_1_a, sum(sum_simplify_t.column1 + Int64(1))@0 as sum_plus_1_b] +02)--AggregateExec: mode=Single, gby=[], aggr=[sum(sum_simplify_t.column1 + Int64(1))] +03)----DataSourceExec: partitions=1, partition_sizes=[1] + + +# constant aggregate expressions +query II +SELECT SUM(2+1), SUM(3) FROM sum_simplify_t; +---- +12 12 + +query TT +EXPLAIN SELECT SUM(2+1), SUM(3) FROM sum_simplify_t; +---- +logical_plan +01)Projection: __common_expr_1 AS sum(Int64(2) + Int64(1)), __common_expr_1 AS sum(Int64(3)) +02)--Aggregate: groupBy=[[]], aggr=[[sum(Int64(3)) AS __common_expr_1]] +03)----TableScan: sum_simplify_t projection=[] +physical_plan +01)ProjectionExec: expr=[__common_expr_1@0 as sum(Int64(2) + Int64(1)), __common_expr_1@0 as sum(Int64(3))] +02)--AggregateExec: mode=Single, gby=[], aggr=[__common_expr_1] +03)----DataSourceExec: partitions=1, partition_sizes=[1] + + +# Duplicated expression across multiple aggregate arguments. +query II +SELECT SUM(column1 + 1), SUM(column1 + 2) FROM sum_simplify_t; +---- +7 10 + + +query TT +EXPLAIN SELECT SUM(column1 + 1), SUM(column1 + 2) FROM sum_simplify_t; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[sum(sum_simplify_t.column1 + Int64(1)), sum(sum_simplify_t.column1 + Int64(2))]] +02)--TableScan: sum_simplify_t projection=[column1] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[sum(sum_simplify_t.column1 + Int64(1)), sum(sum_simplify_t.column1 + Int64(2))] +02)--DataSourceExec: partitions=1, partition_sizes=[1] + +# Reordered expressions that still compute the same thing +query II +SELECT SUM(1 + column1), SUM(column1 + 2) FROM sum_simplify_t; +---- +7 10 + +query TT +EXPLAIN SELECT SUM(1 + column1), SUM(column1 + 2) FROM sum_simplify_t; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[sum(Int64(1) + sum_simplify_t.column1), sum(sum_simplify_t.column1 + Int64(2))]] +02)--TableScan: sum_simplify_t projection=[column1] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[sum(Int64(1) + sum_simplify_t.column1), sum(sum_simplify_t.column1 + Int64(2))] +02)--DataSourceExec: partitions=1, partition_sizes=[1] + +# DISTINCT aggregates with different arguments +query II +SELECT SUM(DISTINCT column1 + 1), SUM(DISTINCT column1 + 2) FROM sum_simplify_t; +---- +5 7 + +query TT +EXPLAIN SELECT SUM(DISTINCT column1 + 1), SUM(DISTINCT column1 + 2) FROM sum_simplify_t; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[sum(DISTINCT sum_simplify_t.column1 + Int64(1)), sum(DISTINCT sum_simplify_t.column1 + Int64(2))]] +02)--TableScan: sum_simplify_t projection=[column1] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[sum(DISTINCT sum_simplify_t.column1 + Int64(1)), sum(DISTINCT sum_simplify_t.column1 + Int64(2))] +02)--DataSourceExec: partitions=1, partition_sizes=[1] + +# DISTINCT and non-DISTINCT aggregates +query II +SELECT SUM(DISTINCT column1 + 1), SUM(column1 + 1) FROM sum_simplify_t; +---- +5 7 + +query TT +EXPLAIN SELECT SUM(DISTINCT column1 + 1), SUM(column1 + 1) FROM sum_simplify_t; +---- +logical_plan +01)Projection: sum(alias1) AS sum(DISTINCT sum_simplify_t.column1 + Int64(1)), sum(alias2) AS sum(sum_simplify_t.column1 + Int64(1)) +02)--Aggregate: groupBy=[[]], aggr=[[sum(alias1), sum(alias2)]] +03)----Aggregate: groupBy=[[__common_expr_1 AS alias1]], aggr=[[sum(__common_expr_1) AS alias2]] +04)------Projection: sum_simplify_t.column1 + Int64(1) AS __common_expr_1 +05)--------TableScan: sum_simplify_t projection=[column1] +physical_plan +01)ProjectionExec: expr=[sum(alias1)@0 as sum(DISTINCT sum_simplify_t.column1 + Int64(1)), sum(alias2)@1 as sum(sum_simplify_t.column1 + Int64(1))] +02)--AggregateExec: mode=Final, gby=[], aggr=[sum(alias1), sum(alias2)] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(alias1), sum(alias2)] +05)--------AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1], aggr=[alias2] +06)----------RepartitionExec: partitioning=Hash([alias1@0], 4), input_partitions=1 +07)------------AggregateExec: mode=Partial, gby=[__common_expr_1@0 as alias1], aggr=[alias2] +08)--------------ProjectionExec: expr=[column1@0 + 1 as __common_expr_1] +09)----------------DataSourceExec: partitions=1, partition_sizes=[1] + +# FILTER clauses with different aggregate arguments +query II +SELECT SUM(column1 + 1) FILTER (WHERE column1 > 1), SUM(column1 + 2) FILTER (WHERE column1 > 2) FROM sum_simplify_t; +---- +3 NULL + +query TT +EXPLAIN SELECT SUM(column1 + 1) FILTER (WHERE column1 > 1), SUM(column1 + 2) FILTER (WHERE column1 > 2) FROM sum_simplify_t; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[sum(sum_simplify_t.column1 + Int64(1)) FILTER (WHERE sum_simplify_t.column1 > Int64(1)), sum(sum_simplify_t.column1 + Int64(2)) FILTER (WHERE sum_simplify_t.column1 > Int64(2))]] +02)--TableScan: sum_simplify_t projection=[column1] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[sum(sum_simplify_t.column1 + Int64(1)) FILTER (WHERE sum_simplify_t.column1 > Int64(1)), sum(sum_simplify_t.column1 + Int64(2)) FILTER (WHERE sum_simplify_t.column1 > Int64(2))] +02)--DataSourceExec: partitions=1, partition_sizes=[1] + +# FILTER clauses with the same aggregate argument +query II +SELECT + SUM(column1 + 1) FILTER (WHERE column1 > 1) AS filtered_sum_a, + SUM(column1 + 1) FILTER (WHERE column1 > 1) AS filtered_sum_b +FROM sum_simplify_t; +---- +3 3 + +query TT +EXPLAIN SELECT + SUM(column1 + 1) FILTER (WHERE column1 > 1) AS filtered_sum_a, + SUM(column1 + 1) FILTER (WHERE column1 > 1) AS filtered_sum_b +FROM sum_simplify_t; +---- +logical_plan +01)Projection: sum(sum_simplify_t.column1 + Int64(1)) FILTER (WHERE sum_simplify_t.column1 > Int64(1)) AS filtered_sum_a, sum(sum_simplify_t.column1 + Int64(1)) FILTER (WHERE sum_simplify_t.column1 > Int64(1)) AS filtered_sum_b +02)--Aggregate: groupBy=[[]], aggr=[[sum(sum_simplify_t.column1 + Int64(1)) FILTER (WHERE sum_simplify_t.column1 > Int64(1))]] +03)----TableScan: sum_simplify_t projection=[column1] +physical_plan +01)ProjectionExec: expr=[sum(sum_simplify_t.column1 + Int64(1)) FILTER (WHERE sum_simplify_t.column1 > Int64(1))@0 as filtered_sum_a, sum(sum_simplify_t.column1 + Int64(1)) FILTER (WHERE sum_simplify_t.column1 > Int64(1))@0 as filtered_sum_b] +02)--AggregateExec: mode=Single, gby=[], aggr=[sum(sum_simplify_t.column1 + Int64(1)) FILTER (WHERE sum_simplify_t.column1 > Int64(1))] +03)----DataSourceExec: partitions=1, partition_sizes=[1] + +# Same aggregate argument with different FILTER predicates +query II +SELECT SUM(column1 + 1) FILTER (WHERE column1 > 1), SUM(column1 + 1) FILTER (WHERE column1 > 0) FROM sum_simplify_t; +---- +3 7 + +query TT +EXPLAIN SELECT SUM(column1 + 1) FILTER (WHERE column1 > 1), SUM(column1 + 1) FILTER (WHERE column1 > 0) FROM sum_simplify_t; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_1 AS sum_simplify_t.column1 + Int64(1)) FILTER (WHERE sum_simplify_t.column1 > Int64(1)), sum(__common_expr_1 AS sum_simplify_t.column1 + Int64(1)) FILTER (WHERE sum_simplify_t.column1 > Int64(0))]] +02)--Projection: sum_simplify_t.column1 + Int64(1) AS __common_expr_1, sum_simplify_t.column1 +03)----TableScan: sum_simplify_t projection=[column1] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[sum(sum_simplify_t.column1 + Int64(1)) FILTER (WHERE sum_simplify_t.column1 > Int64(1)), sum(sum_simplify_t.column1 + Int64(1)) FILTER (WHERE sum_simplify_t.column1 > Int64(0))] +02)--ProjectionExec: expr=[column1@0 + 1 as __common_expr_1, column1@0 as column1] +03)----DataSourceExec: partitions=1, partition_sizes=[1] + +# volatile aggregate arguments +query B +SELECT SUM(random() + 1) < SUM(random() + 2) FROM sum_simplify_t; +---- +true + +query TT +EXPLAIN SELECT SUM(random() + 1) < SUM(random() + 2) FROM sum_simplify_t; +---- +logical_plan +01)Projection: sum(random() + Int64(2)) > sum(random() + Int64(1)) AS sum(random() + Int64(1)) < sum(random() + Int64(2)) +02)--Aggregate: groupBy=[[]], aggr=[[sum(random() + Float64(1)) AS sum(random() + Int64(1)), sum(random() + Float64(2)) AS sum(random() + Int64(2))]] +03)----TableScan: sum_simplify_t projection=[] +physical_plan +01)ProjectionExec: expr=[sum(random() + Int64(2))@1 > sum(random() + Int64(1))@0 as sum(random() + Int64(1)) < sum(random() + Int64(2))] +02)--AggregateExec: mode=Single, gby=[], aggr=[sum(random() + Int64(1)), sum(random() + Int64(2))] +03)----DataSourceExec: partitions=1, partition_sizes=[1] + +# Checks grouped aggregates with explicit ORDER BY return deterministic row order. +query III +SELECT column2, SUM(column1 + 1), SUM(column1 + 2) FROM sum_simplify_t GROUP BY column2 ORDER BY column2 DESC NULLS LAST; +---- +200 2 3 +100 5 7 +NULL NULL NULL + +query TT +EXPLAIN SELECT column2, SUM(column1 + 1), SUM(column1 + 2) FROM sum_simplify_t GROUP BY column2 ORDER BY column2 DESC NULLS LAST; +---- +logical_plan +01)Sort: sum_simplify_t.column2 DESC NULLS LAST +02)--Aggregate: groupBy=[[sum_simplify_t.column2]], aggr=[[sum(sum_simplify_t.column1 + Int64(1)), sum(sum_simplify_t.column1 + Int64(2))]] +03)----TableScan: sum_simplify_t projection=[column1, column2] +physical_plan +01)SortPreservingMergeExec: [column2@0 DESC NULLS LAST] +02)--SortExec: expr=[column2@0 DESC NULLS LAST], preserve_partitioning=[true] +03)----AggregateExec: mode=FinalPartitioned, gby=[column2@0 as column2], aggr=[sum(sum_simplify_t.column1 + Int64(1)), sum(sum_simplify_t.column1 + Int64(2))] +04)------RepartitionExec: partitioning=Hash([column2@0], 4), input_partitions=1 +05)--------AggregateExec: mode=Partial, gby=[column2@1 as column2], aggr=[sum(sum_simplify_t.column1 + Int64(1)), sum(sum_simplify_t.column1 + Int64(2))] +06)----------DataSourceExec: partitions=1, partition_sizes=[1] + +# Checks commutative forms of equivalent aggregate arguments are simplified consistently. +query II +SELECT SUM(1 + column1), SUM(column1 + 1) FROM sum_simplify_t; +---- +7 7 + +query TT +EXPLAIN SELECT SUM(1 + column1), SUM(column1 + 1) FROM sum_simplify_t; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_1 AS Int64(1) + sum_simplify_t.column1), sum(__common_expr_1 AS sum_simplify_t.column1 + Int64(1))]] +02)--Projection: Int64(1) + sum_simplify_t.column1 AS __common_expr_1 +03)----TableScan: sum_simplify_t projection=[column1] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[sum(Int64(1) + sum_simplify_t.column1), sum(sum_simplify_t.column1 + Int64(1))] +02)--ProjectionExec: expr=[1 + column1@0 as __common_expr_1] +03)----DataSourceExec: partitions=1, partition_sizes=[1] + +# Checks unsigned overflow edge case from PR discussion using transformed SUM arguments. +statement ok +CREATE TABLE IF NOT EXISTS tbl (val INTEGER UNSIGNED); + +statement ok +INSERT INTO tbl VALUES (4294967295); + +statement ok +INSERT INTO tbl VALUES (4294967295); + +# Checks transformed SUM results for unsigned max values are preserved. +query TII +SELECT arrow_typeof(SUM(val + 1)), SUM(val + 1), SUM(val + 2) FROM tbl; +---- +Int64 8589934592 8589934594 + +query TT +EXPLAIN SELECT arrow_typeof(SUM(val + 1)), SUM(val + 1), SUM(val + 2) FROM tbl; +---- +logical_plan +01)Projection: arrow_typeof(sum(tbl.val + Int64(1))), sum(tbl.val + Int64(1)), sum(tbl.val + Int64(2)) +02)--Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_1 AS tbl.val + Int64(1)), sum(__common_expr_1 AS tbl.val + Int64(2))]] +03)----Projection: CAST(tbl.val AS Int64) AS __common_expr_1 +04)------TableScan: tbl projection=[val] +physical_plan +01)ProjectionExec: expr=[arrow_typeof(sum(tbl.val + Int64(1))@0) as arrow_typeof(sum(tbl.val + Int64(1))), sum(tbl.val + Int64(1))@0 as sum(tbl.val + Int64(1)), sum(tbl.val + Int64(2))@1 as sum(tbl.val + Int64(2))] +02)--AggregateExec: mode=Single, gby=[], aggr=[sum(tbl.val + Int64(1)), sum(tbl.val + Int64(2))] +03)----ProjectionExec: expr=[CAST(val@0 AS Int64) as __common_expr_1] +04)------DataSourceExec: partitions=1, partition_sizes=[2] + +# Checks equivalent rewritten form (SUM + COUNT terms) matches transformed SUM semantics. +query RR +SELECT SUM(val) + 1 * COUNT(val), SUM(val) + 2 * COUNT(val) FROM tbl; +---- +8589934592 8589934594 + +query TT +EXPLAIN SELECT SUM(val) + 1 * COUNT(val), SUM(val) + 2 * COUNT(val) FROM tbl; +---- +logical_plan +01)Projection: __common_expr_1 + CAST(count(tbl.val) AS Decimal128(20, 0)) AS sum(tbl.val) + Int64(1) * count(tbl.val), __common_expr_1 AS sum(tbl.val) + CAST(Int64(2) * count(tbl.val) AS Decimal128(20, 0)) +02)--Projection: CAST(sum(tbl.val) AS Decimal128(20, 0)) AS __common_expr_1, count(tbl.val) +03)----Aggregate: groupBy=[[]], aggr=[[sum(CAST(tbl.val AS UInt64)), count(tbl.val)]] +04)------TableScan: tbl projection=[val] +physical_plan +01)ProjectionExec: expr=[__common_expr_1@0 + CAST(count(tbl.val)@1 AS Decimal128(20, 0)) as sum(tbl.val) + Int64(1) * count(tbl.val), __common_expr_1@0 + CAST(2 * count(tbl.val)@1 AS Decimal128(20, 0)) as sum(tbl.val) + Int64(2) * count(tbl.val)] +02)--ProjectionExec: expr=[CAST(sum(tbl.val)@0 AS Decimal128(20, 0)) as __common_expr_1, count(tbl.val)@1 as count(tbl.val)] +03)----AggregateExec: mode=Single, gby=[], aggr=[sum(tbl.val), count(tbl.val)] +04)------DataSourceExec: partitions=1, partition_sizes=[2] + +statement ok +DROP TABLE IF EXISTS tbl; + +statement ok +DROP TABLE sum_simplify_t; From 25350af01187695f757ad9ab2dcdc722cf19b3fc Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 3 Mar 2026 04:45:44 -0500 Subject: [PATCH 2/2] Add rewrite SUM(expr+C) --> SUM(expr) + COUNT(expr)*C --- datafusion/expr/src/expr.rs | 2 +- datafusion/expr/src/simplify.rs | 13 ++ datafusion/functions-aggregate/src/sum.rs | 156 +++++++++++++++++- .../simplify_expressions/simplify_exprs.rs | 137 ++++++++++++++- .../test_files/aggregates_simplify.slt | 80 +++++---- .../sqllogictest/test_files/clickbench.slt | 15 +- 6 files changed, 353 insertions(+), 50 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 5c6acd480e9e..12c879a51571 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -600,7 +600,7 @@ impl Alias { } } -/// Binary expression +/// Binary expression for [`Expr::BinaryExpr`] #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] pub struct BinaryExpr { /// Left-hand side of the expression diff --git a/datafusion/expr/src/simplify.rs b/datafusion/expr/src/simplify.rs index 8c68067a55a3..0794de14dd9b 100644 --- a/datafusion/expr/src/simplify.rs +++ b/datafusion/expr/src/simplify.rs @@ -38,6 +38,7 @@ pub struct SimplifyContext { schema: DFSchemaRef, query_execution_start_time: Option>, config_options: Arc, + aggregate_exprs: Option>>, } impl Default for SimplifyContext { @@ -46,6 +47,7 @@ impl Default for SimplifyContext { schema: Arc::new(DFSchema::empty()), query_execution_start_time: None, config_options: Arc::new(ConfigOptions::default()), + aggregate_exprs: None, } } } @@ -78,6 +80,12 @@ impl SimplifyContext { self } + /// Set aggregate expressions from the containing aggregate node, if any. + pub fn with_aggregate_exprs(mut self, aggregate_exprs: Arc>) -> Self { + self.aggregate_exprs = Some(aggregate_exprs); + self + } + /// Returns the schema pub fn schema(&self) -> &DFSchemaRef { &self.schema @@ -108,6 +116,11 @@ impl SimplifyContext { pub fn config_options(&self) -> &Arc { &self.config_options } + + /// Returns aggregate expressions from the containing aggregate node, if any. + pub fn aggregate_exprs(&self) -> Option<&[Expr]> { + self.aggregate_exprs.as_deref().map(Vec::as_slice) + } } /// Was the expression simplified? diff --git a/datafusion/functions-aggregate/src/sum.rs b/datafusion/functions-aggregate/src/sum.rs index 198ba54adfa2..2e2a51326d07 100644 --- a/datafusion/functions-aggregate/src/sum.rs +++ b/datafusion/functions-aggregate/src/sum.rs @@ -32,12 +32,17 @@ use datafusion_common::types::{ logical_int64, logical_uint8, logical_uint16, logical_uint32, logical_uint64, }; use datafusion_common::{HashMap, Result, ScalarValue, exec_err, not_impl_err}; -use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; +use datafusion_expr::expr::{AggregateFunction, AggregateFunctionParams}; +use datafusion_expr::expr_fn::cast; +use datafusion_expr::function::{ + AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs, +}; +use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::utils::{AggregateOrderSensitivity, format_state_name}; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, Coercion, Documentation, Expr, GroupsAccumulator, - ReversedUDAF, SetMonotonicity, Signature, TypeSignature, TypeSignatureClass, - Volatility, + Accumulator, AggregateUDFImpl, BinaryExpr, Coercion, Documentation, Expr, + GroupsAccumulator, Operator, ReversedUDAF, SetMonotonicity, Signature, TypeSignature, + TypeSignatureClass, Volatility, }; use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator; use datafusion_functions_aggregate_common::aggregate::sum_distinct::DistinctSumAccumulator; @@ -54,7 +59,7 @@ make_udaf_expr_and_func!( ); pub fn sum_distinct(expr: Expr) -> Expr { - Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction::new_udf( + Expr::AggregateFunction(AggregateFunction::new_udf( sum_udaf(), vec![expr], true, @@ -346,6 +351,147 @@ impl AggregateUDFImpl for Sum { _ => SetMonotonicity::NotMonotonic, } } + + /// Simplification Rules + fn simplify(&self) -> Option { + Some(Box::new(sum_simplifier)) + } +} + +/// Implement ClickBench Q29 specific optimization: +/// `SUM(arg + constant)` --> `SUM(arg) + constant * COUNT(arg)` +/// +/// Backstory: TODO +/// +fn sum_simplifier(mut agg: AggregateFunction, info: &SimplifyContext) -> Result { + // Explicitly destructure to ensure we check all relevant fields + let AggregateFunctionParams { + args, + distinct, + filter, + order_by, + null_treatment, + } = &agg.params; + + if *distinct + || filter.is_some() + || !order_by.is_empty() + || null_treatment.is_some() + || args.len() != 1 + { + return Ok(Expr::AggregateFunction(agg)); + } + + // otherwise check the arguments if they are + + let (arg, lit) = match SplitResult::new(agg.params.args[0].clone()) { + SplitResult::Original => return Ok(Expr::AggregateFunction(agg)), + SplitResult::Split { arg, lit } => (arg, lit), + }; + + if !has_common_rewrite_arg(&arg, info) { + return Ok(Expr::AggregateFunction(agg)); + } + + let lit_type = match &lit { + Expr::Literal(value, _) => value.data_type(), + _ => unreachable!("SplitResult::Split guarantees literal side"), + }; + if lit_type == DataType::Null { + return Ok(Expr::AggregateFunction(agg)); + } + + // Rewrite to SUM(arg) + agg.params.args = vec![arg.clone()]; + let sum_agg = Expr::AggregateFunction(agg); + + let count_agg = cast(crate::count::count(arg), lit_type); + + // sum(arg) + scalar * COUNT(arg) + Ok(sum_agg + (lit * count_agg)) +} + +fn has_common_rewrite_arg(arg: &Expr, info: &SimplifyContext) -> bool { + let Some(aggregate_exprs) = info.aggregate_exprs() else { + // Only apply this rewrite in the context of an Aggregate node where + // sibling aggregate expressions are known. + return false; + }; + + aggregate_exprs + .iter() + .filter_map(sum_rewrite_candidate_arg) + .filter(|candidate_arg| candidate_arg == arg) + .take(2) + .count() + > 1 +} + +fn sum_rewrite_candidate_arg(expr: &Expr) -> Option { + let Expr::AggregateFunction(aggregate_fn) = expr.clone().unalias_nested().data else { + return None; + }; + if !aggregate_fn.func.name().eq_ignore_ascii_case("sum") { + return None; + } + + let AggregateFunctionParams { + args, + distinct, + filter, + order_by, + null_treatment, + } = &aggregate_fn.params; + + if *distinct + || filter.is_some() + || !order_by.is_empty() + || null_treatment.is_some() + || args.len() != 1 + { + return None; + } + + match SplitResult::new(args[0].clone()) { + SplitResult::Split { arg, .. } => Some(arg), + SplitResult::Original => None, + } +} + +/// Result of trying to split an expression into an arg and constant +#[expect(clippy::large_enum_variant)] +#[derive(Debug, Clone)] +enum SplitResult { + /// if the expression is either of + /// * ` ` + /// * ` ` + /// + /// When `op` is `+` + Split { arg: Expr, lit: Expr }, + /// If the expression is something else + Original, +} + +impl SplitResult { + fn new(expr: Expr) -> Self { + let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr else { + return Self::Original; + }; + if op != Operator::Plus { + return Self::Original; + } + + match (left.as_ref(), right.as_ref()) { + (Expr::Literal(..), _) => Self::Split { + arg: *right, + lit: *left, + }, + (_, Expr::Literal(..)) => Self::Split { + arg: *left, + lit: *right, + }, + _ => Self::Original, + } + } } /// This accumulator computes SUM incrementally diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index f7f100015004..428d5966b000 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -20,11 +20,13 @@ use std::sync::Arc; use datafusion_common::tree_node::{Transformed, TreeNode}; -use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result}; +use datafusion_common::{Column, DFSchema, DFSchemaRef, DataFusionError, Result}; use datafusion_expr::Expr; -use datafusion_expr::logical_plan::LogicalPlan; +use datafusion_expr::logical_plan::{Aggregate, LogicalPlan, Projection}; use datafusion_expr::simplify::SimplifyContext; -use datafusion_expr::utils::merge_schema; +use datafusion_expr::utils::{ + columnize_expr, find_aggregate_exprs, grouping_set_to_exprlist, merge_schema, +}; use crate::optimizer::ApplyOrder; use crate::utils::NamePreserver; @@ -101,6 +103,12 @@ impl SimplifyExpressions { .with_config_options(config.options()) .with_query_execution_start_time(config.query_execution_start_time()); + let info = if let LogicalPlan::Aggregate(Aggregate { aggr_expr, .. }) = &plan { + info.with_aggregate_exprs(Arc::new(aggr_expr.clone())) + } else { + info + }; + // Inputs have already been rewritten (due to bottom-up traversal handled by Optimizer) // Just need to rewrite our own expressions @@ -130,14 +138,16 @@ impl SimplifyExpressions { )) }; - plan.map_expressions(|expr| { + let transformed = plan.map_expressions(|expr| { // Preserve the aliasing of grouping sets. if let Expr::GroupingSet(_) = &expr { expr.map_children(&mut rewrite_expr) } else { rewrite_expr(expr) } - }) + })?; + + transformed.transform_data(rewrite_aggregate_non_aggregate_aggr_expr) } } @@ -148,6 +158,75 @@ impl SimplifyExpressions { } } +/// Rewrites +/// `Aggregate(group_expr, aggr_expr=[non_agg_expr(sum(..), count(..), ..)])` +/// into: +/// `Projection(..)` over `Aggregate(group_expr, aggr_expr=[sum(..), count(..), ..])`. +/// +/// Aggregate planning requires each aggregate slot to be an aggregate function +/// (possibly aliased). Some UDAF simplifiers can return larger expressions that +/// reference multiple aggregate functions. +fn rewrite_aggregate_non_aggregate_aggr_expr( + plan: LogicalPlan, +) -> Result> { + let LogicalPlan::Aggregate(Aggregate { + input, + group_expr, + aggr_expr, + .. + }) = plan + else { + return Ok(Transformed::no(plan)); + }; + + if aggr_expr.iter().all(is_top_level_aggregate_expr) { + return Ok(Transformed::no(LogicalPlan::Aggregate(Aggregate::try_new( + input, group_expr, aggr_expr, + )?))); + } + + let inner_aggr_expr = find_aggregate_exprs(aggr_expr.iter()); + let inner_aggregate = LogicalPlan::Aggregate(Aggregate::try_new( + Arc::clone(&input), + group_expr.clone(), + inner_aggr_expr, + )?); + let inner_aggregate = Arc::new(inner_aggregate); + + let mut projection_exprs = aggregate_output_exprs(&group_expr)?; + projection_exprs.extend(aggr_expr); + let projection_exprs = projection_exprs + .into_iter() + .map(|expr| columnize_expr(expr, inner_aggregate.as_ref())) + .collect::>>()?; + + Ok(Transformed::yes(LogicalPlan::Projection( + Projection::try_new(projection_exprs, inner_aggregate)?, + ))) +} + +fn is_top_level_aggregate_expr(expr: &Expr) -> bool { + matches!( + expr.clone().unalias_nested().data, + Expr::AggregateFunction(_) + ) +} + +fn aggregate_output_exprs(group_expr: &[Expr]) -> Result> { + let mut output_exprs = grouping_set_to_exprlist(group_expr)? + .into_iter() + .cloned() + .collect::>(); + + if matches!(group_expr, [Expr::GroupingSet(_)]) { + output_exprs.push(Expr::Column(Column::from_name( + Aggregate::INTERNAL_GROUPING_ID, + ))); + } + + Ok(output_exprs) +} + #[cfg(test)] mod tests { use std::ops::Not; @@ -158,7 +237,7 @@ mod tests { use datafusion_expr::logical_plan::builder::table_scan_with_filters; use datafusion_expr::logical_plan::table_scan; use datafusion_expr::*; - use datafusion_functions_aggregate::expr_fn::{max, min}; + use datafusion_functions_aggregate::expr_fn::{max, min, sum}; use crate::OptimizerContext; use crate::assert_optimized_plan_eq_snapshot; @@ -258,6 +337,52 @@ mod tests { ) } + #[test] + fn test_simplify_udaf_to_non_aggregate_expr() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Int64, false)]); + let table_scan = table_scan(Some("test"), &schema, None)? + .build() + .expect("building scan"); + + let plan = LogicalPlanBuilder::from(table_scan) + .aggregate(Vec::::new(), vec![sum(col("a") + lit(2i64))])? + .build()?; + + assert_optimized_plan_equal!( + plan, + @r" + Aggregate: groupBy=[[]], aggr=[[sum(test.a + Int64(2))]] + TableScan: test + " + )?; + Ok(()) + } + + #[test] + fn test_simplify_udaf_to_non_aggregate_expr_with_common_sum_arg() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Int64, false)]); + let table_scan = table_scan(Some("test"), &schema, None)? + .build() + .expect("building scan"); + + let plan = LogicalPlanBuilder::from(table_scan) + .aggregate( + Vec::::new(), + vec![sum(col("a") + lit(2i64)), sum(col("a") + lit(3i64))], + )? + .build()?; + + assert_optimized_plan_equal!( + plan, + @r" + Projection: sum(test.a) + Int64(2) * CAST(count(test.a) AS Int64) AS sum(test.a + Int64(2)), sum(test.a) + Int64(3) * CAST(count(test.a) AS Int64) AS sum(test.a + Int64(3)) + Aggregate: groupBy=[[]], aggr=[[sum(test.a), count(test.a)]] + TableScan: test + " + )?; + Ok(()) + } + #[test] fn test_simplify_optimized_plan_with_or() -> Result<()> { let table_scan = test_table_scan(); diff --git a/datafusion/sqllogictest/test_files/aggregates_simplify.slt b/datafusion/sqllogictest/test_files/aggregates_simplify.slt index cc2e40540b98..9c49521cb4ba 100644 --- a/datafusion/sqllogictest/test_files/aggregates_simplify.slt +++ b/datafusion/sqllogictest/test_files/aggregates_simplify.slt @@ -106,11 +106,14 @@ query TT EXPLAIN SELECT SUM(column1 + 1), SUM(column1 + 2) FROM sum_simplify_t; ---- logical_plan -01)Aggregate: groupBy=[[]], aggr=[[sum(sum_simplify_t.column1 + Int64(1)), sum(sum_simplify_t.column1 + Int64(2))]] -02)--TableScan: sum_simplify_t projection=[column1] +01)Projection: sum(sum_simplify_t.column1) + __common_expr_1 AS sum(sum_simplify_t.column1 + Int64(1)), sum(sum_simplify_t.column1) + Int64(2) * __common_expr_1 AS sum(sum_simplify_t.column1 + Int64(2)) +02)--Projection: CAST(count(sum_simplify_t.column1) AS Int64) AS __common_expr_1, sum(sum_simplify_t.column1) +03)----Aggregate: groupBy=[[]], aggr=[[sum(sum_simplify_t.column1), count(sum_simplify_t.column1)]] +04)------TableScan: sum_simplify_t projection=[column1] physical_plan -01)AggregateExec: mode=Single, gby=[], aggr=[sum(sum_simplify_t.column1 + Int64(1)), sum(sum_simplify_t.column1 + Int64(2))] -02)--DataSourceExec: partitions=1, partition_sizes=[1] +01)ProjectionExec: expr=[sum(sum_simplify_t.column1)@0 + count(sum_simplify_t.column1)@1 as sum(sum_simplify_t.column1 + Int64(1)), sum(sum_simplify_t.column1)@0 + 2 * count(sum_simplify_t.column1)@1 as sum(sum_simplify_t.column1 + Int64(2))] +02)--AggregateExec: mode=Single, gby=[], aggr=[sum(sum_simplify_t.column1), count(sum_simplify_t.column1)] +03)----DataSourceExec: partitions=1, partition_sizes=[1] # Reordered expressions that still compute the same thing query II @@ -122,11 +125,14 @@ query TT EXPLAIN SELECT SUM(1 + column1), SUM(column1 + 2) FROM sum_simplify_t; ---- logical_plan -01)Aggregate: groupBy=[[]], aggr=[[sum(Int64(1) + sum_simplify_t.column1), sum(sum_simplify_t.column1 + Int64(2))]] -02)--TableScan: sum_simplify_t projection=[column1] +01)Projection: sum(sum_simplify_t.column1) + __common_expr_1 AS sum(Int64(1) + sum_simplify_t.column1), sum(sum_simplify_t.column1) + Int64(2) * __common_expr_1 AS sum(sum_simplify_t.column1 + Int64(2)) +02)--Projection: CAST(count(sum_simplify_t.column1) AS Int64) AS __common_expr_1, sum(sum_simplify_t.column1) +03)----Aggregate: groupBy=[[]], aggr=[[sum(sum_simplify_t.column1), count(sum_simplify_t.column1)]] +04)------TableScan: sum_simplify_t projection=[column1] physical_plan -01)AggregateExec: mode=Single, gby=[], aggr=[sum(Int64(1) + sum_simplify_t.column1), sum(sum_simplify_t.column1 + Int64(2))] -02)--DataSourceExec: partitions=1, partition_sizes=[1] +01)ProjectionExec: expr=[sum(sum_simplify_t.column1)@0 + count(sum_simplify_t.column1)@1 as sum(Int64(1) + sum_simplify_t.column1), sum(sum_simplify_t.column1)@0 + 2 * count(sum_simplify_t.column1)@1 as sum(sum_simplify_t.column1 + Int64(2))] +02)--AggregateExec: mode=Single, gby=[], aggr=[sum(sum_simplify_t.column1), count(sum_simplify_t.column1)] +03)----DataSourceExec: partitions=1, partition_sizes=[1] # DISTINCT aggregates with different arguments query II @@ -238,13 +244,15 @@ query TT EXPLAIN SELECT SUM(random() + 1) < SUM(random() + 2) FROM sum_simplify_t; ---- logical_plan -01)Projection: sum(random() + Int64(2)) > sum(random() + Int64(1)) AS sum(random() + Int64(1)) < sum(random() + Int64(2)) -02)--Aggregate: groupBy=[[]], aggr=[[sum(random() + Float64(1)) AS sum(random() + Int64(1)), sum(random() + Float64(2)) AS sum(random() + Int64(2))]] -03)----TableScan: sum_simplify_t projection=[] +01)Projection: sum(random()) + Float64(2) * __common_expr_1 > sum(random()) + __common_expr_1 AS sum(random() + Int64(1)) < sum(random() + Int64(2)) +02)--Projection: CAST(count(random()) AS Float64) AS __common_expr_1, sum(random()) +03)----Aggregate: groupBy=[[]], aggr=[[sum(random()), count(random())]] +04)------TableScan: sum_simplify_t projection=[] physical_plan -01)ProjectionExec: expr=[sum(random() + Int64(2))@1 > sum(random() + Int64(1))@0 as sum(random() + Int64(1)) < sum(random() + Int64(2))] -02)--AggregateExec: mode=Single, gby=[], aggr=[sum(random() + Int64(1)), sum(random() + Int64(2))] -03)----DataSourceExec: partitions=1, partition_sizes=[1] +01)ProjectionExec: expr=[sum(random())@1 + 2 * __common_expr_1@0 > sum(random())@1 + __common_expr_1@0 as sum(random() + Int64(1)) < sum(random() + Int64(2))] +02)--ProjectionExec: expr=[CAST(count(random())@1 AS Float64) as __common_expr_1, sum(random())@0 as sum(random())] +03)----AggregateExec: mode=Single, gby=[], aggr=[sum(random()), count(random())] +04)------DataSourceExec: partitions=1, partition_sizes=[1] # Checks grouped aggregates with explicit ORDER BY return deterministic row order. query III @@ -259,15 +267,18 @@ EXPLAIN SELECT column2, SUM(column1 + 1), SUM(column1 + 2) FROM sum_simplify_t G ---- logical_plan 01)Sort: sum_simplify_t.column2 DESC NULLS LAST -02)--Aggregate: groupBy=[[sum_simplify_t.column2]], aggr=[[sum(sum_simplify_t.column1 + Int64(1)), sum(sum_simplify_t.column1 + Int64(2))]] -03)----TableScan: sum_simplify_t projection=[column1, column2] +02)--Projection: sum_simplify_t.column2, sum(sum_simplify_t.column1) + __common_expr_1 AS sum(sum_simplify_t.column1 + Int64(1)), sum(sum_simplify_t.column1) + Int64(2) * __common_expr_1 AS sum(sum_simplify_t.column1 + Int64(2)) +03)----Projection: CAST(count(sum_simplify_t.column1) AS Int64) AS __common_expr_1, sum_simplify_t.column2, sum(sum_simplify_t.column1) +04)------Aggregate: groupBy=[[sum_simplify_t.column2]], aggr=[[sum(sum_simplify_t.column1), count(sum_simplify_t.column1)]] +05)--------TableScan: sum_simplify_t projection=[column1, column2] physical_plan 01)SortPreservingMergeExec: [column2@0 DESC NULLS LAST] 02)--SortExec: expr=[column2@0 DESC NULLS LAST], preserve_partitioning=[true] -03)----AggregateExec: mode=FinalPartitioned, gby=[column2@0 as column2], aggr=[sum(sum_simplify_t.column1 + Int64(1)), sum(sum_simplify_t.column1 + Int64(2))] -04)------RepartitionExec: partitioning=Hash([column2@0], 4), input_partitions=1 -05)--------AggregateExec: mode=Partial, gby=[column2@1 as column2], aggr=[sum(sum_simplify_t.column1 + Int64(1)), sum(sum_simplify_t.column1 + Int64(2))] -06)----------DataSourceExec: partitions=1, partition_sizes=[1] +03)----ProjectionExec: expr=[column2@0 as column2, sum(sum_simplify_t.column1)@1 + count(sum_simplify_t.column1)@2 as sum(sum_simplify_t.column1 + Int64(1)), sum(sum_simplify_t.column1)@1 + 2 * count(sum_simplify_t.column1)@2 as sum(sum_simplify_t.column1 + Int64(2))] +04)------AggregateExec: mode=FinalPartitioned, gby=[column2@0 as column2], aggr=[sum(sum_simplify_t.column1), count(sum_simplify_t.column1)] +05)--------RepartitionExec: partitioning=Hash([column2@0], 4), input_partitions=1 +06)----------AggregateExec: mode=Partial, gby=[column2@1 as column2], aggr=[sum(sum_simplify_t.column1), count(sum_simplify_t.column1)] +07)------------DataSourceExec: partitions=1, partition_sizes=[1] # Checks commutative forms of equivalent aggregate arguments are simplified consistently. query II @@ -279,13 +290,15 @@ query TT EXPLAIN SELECT SUM(1 + column1), SUM(column1 + 1) FROM sum_simplify_t; ---- logical_plan -01)Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_1 AS Int64(1) + sum_simplify_t.column1), sum(__common_expr_1 AS sum_simplify_t.column1 + Int64(1))]] -02)--Projection: Int64(1) + sum_simplify_t.column1 AS __common_expr_1 -03)----TableScan: sum_simplify_t projection=[column1] +01)Projection: __common_expr_1 AS sum(Int64(1) + sum_simplify_t.column1), __common_expr_1 AS sum(sum_simplify_t.column1 + Int64(1)) +02)--Projection: sum(sum_simplify_t.column1) + CAST(count(sum_simplify_t.column1) AS Int64) AS __common_expr_1 +03)----Aggregate: groupBy=[[]], aggr=[[sum(sum_simplify_t.column1), count(sum_simplify_t.column1)]] +04)------TableScan: sum_simplify_t projection=[column1] physical_plan -01)AggregateExec: mode=Single, gby=[], aggr=[sum(Int64(1) + sum_simplify_t.column1), sum(sum_simplify_t.column1 + Int64(1))] -02)--ProjectionExec: expr=[1 + column1@0 as __common_expr_1] -03)----DataSourceExec: partitions=1, partition_sizes=[1] +01)ProjectionExec: expr=[__common_expr_1@0 as sum(Int64(1) + sum_simplify_t.column1), __common_expr_1@0 as sum(sum_simplify_t.column1 + Int64(1))] +02)--ProjectionExec: expr=[sum(sum_simplify_t.column1)@0 + count(sum_simplify_t.column1)@1 as __common_expr_1] +03)----AggregateExec: mode=Single, gby=[], aggr=[sum(sum_simplify_t.column1), count(sum_simplify_t.column1)] +04)------DataSourceExec: partitions=1, partition_sizes=[1] # Checks unsigned overflow edge case from PR discussion using transformed SUM arguments. statement ok @@ -308,14 +321,17 @@ EXPLAIN SELECT arrow_typeof(SUM(val + 1)), SUM(val + 1), SUM(val + 2) FROM tbl; ---- logical_plan 01)Projection: arrow_typeof(sum(tbl.val + Int64(1))), sum(tbl.val + Int64(1)), sum(tbl.val + Int64(2)) -02)--Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_1 AS tbl.val + Int64(1)), sum(__common_expr_1 AS tbl.val + Int64(2))]] -03)----Projection: CAST(tbl.val AS Int64) AS __common_expr_1 -04)------TableScan: tbl projection=[val] +02)--Projection: sum(tbl.val) + __common_expr_1 AS sum(tbl.val + Int64(1)), sum(tbl.val) + Int64(2) * __common_expr_1 AS sum(tbl.val + Int64(2)) +03)----Projection: CAST(count(tbl.val) AS Int64) AS __common_expr_1, sum(tbl.val) +04)------Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_2 AS tbl.val), count(__common_expr_2 AS tbl.val)]] +05)--------Projection: CAST(tbl.val AS Int64) AS __common_expr_2 +06)----------TableScan: tbl projection=[val] physical_plan 01)ProjectionExec: expr=[arrow_typeof(sum(tbl.val + Int64(1))@0) as arrow_typeof(sum(tbl.val + Int64(1))), sum(tbl.val + Int64(1))@0 as sum(tbl.val + Int64(1)), sum(tbl.val + Int64(2))@1 as sum(tbl.val + Int64(2))] -02)--AggregateExec: mode=Single, gby=[], aggr=[sum(tbl.val + Int64(1)), sum(tbl.val + Int64(2))] -03)----ProjectionExec: expr=[CAST(val@0 AS Int64) as __common_expr_1] -04)------DataSourceExec: partitions=1, partition_sizes=[2] +02)--ProjectionExec: expr=[sum(tbl.val)@0 + count(tbl.val)@1 as sum(tbl.val + Int64(1)), sum(tbl.val)@0 + 2 * count(tbl.val)@1 as sum(tbl.val + Int64(2))] +03)----AggregateExec: mode=Single, gby=[], aggr=[sum(tbl.val), count(tbl.val)] +04)------ProjectionExec: expr=[CAST(val@0 AS Int64) as __common_expr_2] +05)--------DataSourceExec: partitions=1, partition_sizes=[2] # Checks equivalent rewritten form (SUM + COUNT terms) matches transformed SUM semantics. query RR diff --git a/datafusion/sqllogictest/test_files/clickbench.slt b/datafusion/sqllogictest/test_files/clickbench.slt index dd558a4f36f9..d04c940cebe7 100644 --- a/datafusion/sqllogictest/test_files/clickbench.slt +++ b/datafusion/sqllogictest/test_files/clickbench.slt @@ -795,13 +795,16 @@ query TT EXPLAIN SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits; ---- logical_plan -01)Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_1 AS hits.ResolutionWidth), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(1)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(2)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(3)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(4)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(5)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(6)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(7)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(8)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(9)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(10)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(11)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(12)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(13)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(14)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(15)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(16)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(17)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(18)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(19)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(20)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(21)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(22)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(23)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(24)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(25)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(26)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(27)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(28)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(29)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(30)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(31)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(32)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(33)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(34)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(35)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(36)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(37)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(38)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(39)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(40)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(41)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(42)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(43)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(44)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(45)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(46)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(47)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(48)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(49)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(50)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(51)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(52)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(53)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(54)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(55)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(56)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(57)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(58)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(59)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(60)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(61)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(62)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(63)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(64)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(65)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(66)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(67)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(68)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(69)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(70)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(71)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(72)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(73)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(74)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(75)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(76)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(77)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(78)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(79)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(80)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(81)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(82)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(83)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(84)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(85)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(86)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(87)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(88)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(89))]] -02)--Projection: CAST(hits.ResolutionWidth AS Int64) AS __common_expr_1 -03)----SubqueryAlias: hits -04)------TableScan: hits_raw projection=[ResolutionWidth] +01)Projection: sum(hits.ResolutionWidth), sum(hits.ResolutionWidth) + __common_expr_1 AS sum(hits.ResolutionWidth + Int64(1)), sum(hits.ResolutionWidth) + Int64(2) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(2)), sum(hits.ResolutionWidth) + Int64(3) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(3)), sum(hits.ResolutionWidth) + Int64(4) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(4)), sum(hits.ResolutionWidth) + Int64(5) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(5)), sum(hits.ResolutionWidth) + Int64(6) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(6)), sum(hits.ResolutionWidth) + Int64(7) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(7)), sum(hits.ResolutionWidth) + Int64(8) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(8)), sum(hits.ResolutionWidth) + Int64(9) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(9)), sum(hits.ResolutionWidth) + Int64(10) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(10)), sum(hits.ResolutionWidth) + Int64(11) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(11)), sum(hits.ResolutionWidth) + Int64(12) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(12)), sum(hits.ResolutionWidth) + Int64(13) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(13)), sum(hits.ResolutionWidth) + Int64(14) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(14)), sum(hits.ResolutionWidth) + Int64(15) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(15)), sum(hits.ResolutionWidth) + Int64(16) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(16)), sum(hits.ResolutionWidth) + Int64(17) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(17)), sum(hits.ResolutionWidth) + Int64(18) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(18)), sum(hits.ResolutionWidth) + Int64(19) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(19)), sum(hits.ResolutionWidth) + Int64(20) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(20)), sum(hits.ResolutionWidth) + Int64(21) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(21)), sum(hits.ResolutionWidth) + Int64(22) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(22)), sum(hits.ResolutionWidth) + Int64(23) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(23)), sum(hits.ResolutionWidth) + Int64(24) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(24)), sum(hits.ResolutionWidth) + Int64(25) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(25)), sum(hits.ResolutionWidth) + Int64(26) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(26)), sum(hits.ResolutionWidth) + Int64(27) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(27)), sum(hits.ResolutionWidth) + Int64(28) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(28)), sum(hits.ResolutionWidth) + Int64(29) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(29)), sum(hits.ResolutionWidth) + Int64(30) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(30)), sum(hits.ResolutionWidth) + Int64(31) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(31)), sum(hits.ResolutionWidth) + Int64(32) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(32)), sum(hits.ResolutionWidth) + Int64(33) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(33)), sum(hits.ResolutionWidth) + Int64(34) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(34)), sum(hits.ResolutionWidth) + Int64(35) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(35)), sum(hits.ResolutionWidth) + Int64(36) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(36)), sum(hits.ResolutionWidth) + Int64(37) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(37)), sum(hits.ResolutionWidth) + Int64(38) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(38)), sum(hits.ResolutionWidth) + Int64(39) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(39)), sum(hits.ResolutionWidth) + Int64(40) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(40)), sum(hits.ResolutionWidth) + Int64(41) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(41)), sum(hits.ResolutionWidth) + Int64(42) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(42)), sum(hits.ResolutionWidth) + Int64(43) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(43)), sum(hits.ResolutionWidth) + Int64(44) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(44)), sum(hits.ResolutionWidth) + Int64(45) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(45)), sum(hits.ResolutionWidth) + Int64(46) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(46)), sum(hits.ResolutionWidth) + Int64(47) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(47)), sum(hits.ResolutionWidth) + Int64(48) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(48)), sum(hits.ResolutionWidth) + Int64(49) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(49)), sum(hits.ResolutionWidth) + Int64(50) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(50)), sum(hits.ResolutionWidth) + Int64(51) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(51)), sum(hits.ResolutionWidth) + Int64(52) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(52)), sum(hits.ResolutionWidth) + Int64(53) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(53)), sum(hits.ResolutionWidth) + Int64(54) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(54)), sum(hits.ResolutionWidth) + Int64(55) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(55)), sum(hits.ResolutionWidth) + Int64(56) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(56)), sum(hits.ResolutionWidth) + Int64(57) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(57)), sum(hits.ResolutionWidth) + Int64(58) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(58)), sum(hits.ResolutionWidth) + Int64(59) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(59)), sum(hits.ResolutionWidth) + Int64(60) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(60)), sum(hits.ResolutionWidth) + Int64(61) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(61)), sum(hits.ResolutionWidth) + Int64(62) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(62)), sum(hits.ResolutionWidth) + Int64(63) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(63)), sum(hits.ResolutionWidth) + Int64(64) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(64)), sum(hits.ResolutionWidth) + Int64(65) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(65)), sum(hits.ResolutionWidth) + Int64(66) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(66)), sum(hits.ResolutionWidth) + Int64(67) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(67)), sum(hits.ResolutionWidth) + Int64(68) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(68)), sum(hits.ResolutionWidth) + Int64(69) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(69)), sum(hits.ResolutionWidth) + Int64(70) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(70)), sum(hits.ResolutionWidth) + Int64(71) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(71)), sum(hits.ResolutionWidth) + Int64(72) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(72)), sum(hits.ResolutionWidth) + Int64(73) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(73)), sum(hits.ResolutionWidth) + Int64(74) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(74)), sum(hits.ResolutionWidth) + Int64(75) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(75)), sum(hits.ResolutionWidth) + Int64(76) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(76)), sum(hits.ResolutionWidth) + Int64(77) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(77)), sum(hits.ResolutionWidth) + Int64(78) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(78)), sum(hits.ResolutionWidth) + Int64(79) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(79)), sum(hits.ResolutionWidth) + Int64(80) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(80)), sum(hits.ResolutionWidth) + Int64(81) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(81)), sum(hits.ResolutionWidth) + Int64(82) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(82)), sum(hits.ResolutionWidth) + Int64(83) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(83)), sum(hits.ResolutionWidth) + Int64(84) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(84)), sum(hits.ResolutionWidth) + Int64(85) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(85)), sum(hits.ResolutionWidth) + Int64(86) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(86)), sum(hits.ResolutionWidth) + Int64(87) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(87)), sum(hits.ResolutionWidth) + Int64(88) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(88)), sum(hits.ResolutionWidth) + Int64(89) * __common_expr_1 AS sum(hits.ResolutionWidth + Int64(89)) +02)--Projection: CAST(count(hits.ResolutionWidth) AS Int64) AS __common_expr_1, sum(hits.ResolutionWidth) +03)----Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_2 AS hits.ResolutionWidth), count(__common_expr_2 AS hits.ResolutionWidth)]] +04)------Projection: CAST(hits.ResolutionWidth AS Int64) AS __common_expr_2 +05)--------SubqueryAlias: hits +06)----------TableScan: hits_raw projection=[ResolutionWidth] physical_plan -01)AggregateExec: mode=Single, gby=[], aggr=[sum(hits.ResolutionWidth), sum(hits.ResolutionWidth + Int64(1)), sum(hits.ResolutionWidth + Int64(2)), sum(hits.ResolutionWidth + Int64(3)), sum(hits.ResolutionWidth + Int64(4)), sum(hits.ResolutionWidth + Int64(5)), sum(hits.ResolutionWidth + Int64(6)), sum(hits.ResolutionWidth + Int64(7)), sum(hits.ResolutionWidth + Int64(8)), sum(hits.ResolutionWidth + Int64(9)), sum(hits.ResolutionWidth + Int64(10)), sum(hits.ResolutionWidth + Int64(11)), sum(hits.ResolutionWidth + Int64(12)), sum(hits.ResolutionWidth + Int64(13)), sum(hits.ResolutionWidth + Int64(14)), sum(hits.ResolutionWidth + Int64(15)), sum(hits.ResolutionWidth + Int64(16)), sum(hits.ResolutionWidth + Int64(17)), sum(hits.ResolutionWidth + Int64(18)), sum(hits.ResolutionWidth + Int64(19)), sum(hits.ResolutionWidth + Int64(20)), sum(hits.ResolutionWidth + Int64(21)), sum(hits.ResolutionWidth + Int64(22)), sum(hits.ResolutionWidth + Int64(23)), sum(hits.ResolutionWidth + Int64(24)), sum(hits.ResolutionWidth + Int64(25)), sum(hits.ResolutionWidth + Int64(26)), sum(hits.ResolutionWidth + Int64(27)), sum(hits.ResolutionWidth + Int64(28)), sum(hits.ResolutionWidth + Int64(29)), sum(hits.ResolutionWidth + Int64(30)), sum(hits.ResolutionWidth + Int64(31)), sum(hits.ResolutionWidth + Int64(32)), sum(hits.ResolutionWidth + Int64(33)), sum(hits.ResolutionWidth + Int64(34)), sum(hits.ResolutionWidth + Int64(35)), sum(hits.ResolutionWidth + Int64(36)), sum(hits.ResolutionWidth + Int64(37)), sum(hits.ResolutionWidth + Int64(38)), sum(hits.ResolutionWidth + Int64(39)), sum(hits.ResolutionWidth + Int64(40)), sum(hits.ResolutionWidth + Int64(41)), sum(hits.ResolutionWidth + Int64(42)), sum(hits.ResolutionWidth + Int64(43)), sum(hits.ResolutionWidth + Int64(44)), sum(hits.ResolutionWidth + Int64(45)), sum(hits.ResolutionWidth + Int64(46)), sum(hits.ResolutionWidth + Int64(47)), sum(hits.ResolutionWidth + Int64(48)), sum(hits.ResolutionWidth + Int64(49)), sum(hits.ResolutionWidth + Int64(50)), sum(hits.ResolutionWidth + Int64(51)), sum(hits.ResolutionWidth + Int64(52)), sum(hits.ResolutionWidth + Int64(53)), sum(hits.ResolutionWidth + Int64(54)), sum(hits.ResolutionWidth + Int64(55)), sum(hits.ResolutionWidth + Int64(56)), sum(hits.ResolutionWidth + Int64(57)), sum(hits.ResolutionWidth + Int64(58)), sum(hits.ResolutionWidth + Int64(59)), sum(hits.ResolutionWidth + Int64(60)), sum(hits.ResolutionWidth + Int64(61)), sum(hits.ResolutionWidth + Int64(62)), sum(hits.ResolutionWidth + Int64(63)), sum(hits.ResolutionWidth + Int64(64)), sum(hits.ResolutionWidth + Int64(65)), sum(hits.ResolutionWidth + Int64(66)), sum(hits.ResolutionWidth + Int64(67)), sum(hits.ResolutionWidth + Int64(68)), sum(hits.ResolutionWidth + Int64(69)), sum(hits.ResolutionWidth + Int64(70)), sum(hits.ResolutionWidth + Int64(71)), sum(hits.ResolutionWidth + Int64(72)), sum(hits.ResolutionWidth + Int64(73)), sum(hits.ResolutionWidth + Int64(74)), sum(hits.ResolutionWidth + Int64(75)), sum(hits.ResolutionWidth + Int64(76)), sum(hits.ResolutionWidth + Int64(77)), sum(hits.ResolutionWidth + Int64(78)), sum(hits.ResolutionWidth + Int64(79)), sum(hits.ResolutionWidth + Int64(80)), sum(hits.ResolutionWidth + Int64(81)), sum(hits.ResolutionWidth + Int64(82)), sum(hits.ResolutionWidth + Int64(83)), sum(hits.ResolutionWidth + Int64(84)), sum(hits.ResolutionWidth + Int64(85)), sum(hits.ResolutionWidth + Int64(86)), sum(hits.ResolutionWidth + Int64(87)), sum(hits.ResolutionWidth + Int64(88)), sum(hits.ResolutionWidth + Int64(89))] -02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[CAST(ResolutionWidth@20 AS Int64) as __common_expr_1], file_type=parquet +01)ProjectionExec: expr=[sum(hits.ResolutionWidth)@0 as sum(hits.ResolutionWidth), sum(hits.ResolutionWidth)@0 + count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(1)), sum(hits.ResolutionWidth)@0 + 2 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(2)), sum(hits.ResolutionWidth)@0 + 3 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(3)), sum(hits.ResolutionWidth)@0 + 4 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(4)), sum(hits.ResolutionWidth)@0 + 5 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(5)), sum(hits.ResolutionWidth)@0 + 6 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(6)), sum(hits.ResolutionWidth)@0 + 7 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(7)), sum(hits.ResolutionWidth)@0 + 8 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(8)), sum(hits.ResolutionWidth)@0 + 9 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(9)), sum(hits.ResolutionWidth)@0 + 10 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(10)), sum(hits.ResolutionWidth)@0 + 11 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(11)), sum(hits.ResolutionWidth)@0 + 12 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(12)), sum(hits.ResolutionWidth)@0 + 13 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(13)), sum(hits.ResolutionWidth)@0 + 14 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(14)), sum(hits.ResolutionWidth)@0 + 15 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(15)), sum(hits.ResolutionWidth)@0 + 16 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(16)), sum(hits.ResolutionWidth)@0 + 17 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(17)), sum(hits.ResolutionWidth)@0 + 18 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(18)), sum(hits.ResolutionWidth)@0 + 19 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(19)), sum(hits.ResolutionWidth)@0 + 20 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(20)), sum(hits.ResolutionWidth)@0 + 21 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(21)), sum(hits.ResolutionWidth)@0 + 22 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(22)), sum(hits.ResolutionWidth)@0 + 23 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(23)), sum(hits.ResolutionWidth)@0 + 24 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(24)), sum(hits.ResolutionWidth)@0 + 25 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(25)), sum(hits.ResolutionWidth)@0 + 26 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(26)), sum(hits.ResolutionWidth)@0 + 27 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(27)), sum(hits.ResolutionWidth)@0 + 28 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(28)), sum(hits.ResolutionWidth)@0 + 29 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(29)), sum(hits.ResolutionWidth)@0 + 30 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(30)), sum(hits.ResolutionWidth)@0 + 31 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(31)), sum(hits.ResolutionWidth)@0 + 32 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(32)), sum(hits.ResolutionWidth)@0 + 33 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(33)), sum(hits.ResolutionWidth)@0 + 34 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(34)), sum(hits.ResolutionWidth)@0 + 35 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(35)), sum(hits.ResolutionWidth)@0 + 36 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(36)), sum(hits.ResolutionWidth)@0 + 37 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(37)), sum(hits.ResolutionWidth)@0 + 38 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(38)), sum(hits.ResolutionWidth)@0 + 39 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(39)), sum(hits.ResolutionWidth)@0 + 40 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(40)), sum(hits.ResolutionWidth)@0 + 41 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(41)), sum(hits.ResolutionWidth)@0 + 42 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(42)), sum(hits.ResolutionWidth)@0 + 43 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(43)), sum(hits.ResolutionWidth)@0 + 44 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(44)), sum(hits.ResolutionWidth)@0 + 45 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(45)), sum(hits.ResolutionWidth)@0 + 46 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(46)), sum(hits.ResolutionWidth)@0 + 47 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(47)), sum(hits.ResolutionWidth)@0 + 48 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(48)), sum(hits.ResolutionWidth)@0 + 49 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(49)), sum(hits.ResolutionWidth)@0 + 50 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(50)), sum(hits.ResolutionWidth)@0 + 51 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(51)), sum(hits.ResolutionWidth)@0 + 52 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(52)), sum(hits.ResolutionWidth)@0 + 53 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(53)), sum(hits.ResolutionWidth)@0 + 54 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(54)), sum(hits.ResolutionWidth)@0 + 55 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(55)), sum(hits.ResolutionWidth)@0 + 56 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(56)), sum(hits.ResolutionWidth)@0 + 57 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(57)), sum(hits.ResolutionWidth)@0 + 58 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(58)), sum(hits.ResolutionWidth)@0 + 59 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(59)), sum(hits.ResolutionWidth)@0 + 60 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(60)), sum(hits.ResolutionWidth)@0 + 61 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(61)), sum(hits.ResolutionWidth)@0 + 62 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(62)), sum(hits.ResolutionWidth)@0 + 63 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(63)), sum(hits.ResolutionWidth)@0 + 64 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(64)), sum(hits.ResolutionWidth)@0 + 65 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(65)), sum(hits.ResolutionWidth)@0 + 66 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(66)), sum(hits.ResolutionWidth)@0 + 67 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(67)), sum(hits.ResolutionWidth)@0 + 68 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(68)), sum(hits.ResolutionWidth)@0 + 69 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(69)), sum(hits.ResolutionWidth)@0 + 70 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(70)), sum(hits.ResolutionWidth)@0 + 71 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(71)), sum(hits.ResolutionWidth)@0 + 72 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(72)), sum(hits.ResolutionWidth)@0 + 73 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(73)), sum(hits.ResolutionWidth)@0 + 74 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(74)), sum(hits.ResolutionWidth)@0 + 75 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(75)), sum(hits.ResolutionWidth)@0 + 76 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(76)), sum(hits.ResolutionWidth)@0 + 77 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(77)), sum(hits.ResolutionWidth)@0 + 78 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(78)), sum(hits.ResolutionWidth)@0 + 79 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(79)), sum(hits.ResolutionWidth)@0 + 80 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(80)), sum(hits.ResolutionWidth)@0 + 81 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(81)), sum(hits.ResolutionWidth)@0 + 82 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(82)), sum(hits.ResolutionWidth)@0 + 83 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(83)), sum(hits.ResolutionWidth)@0 + 84 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(84)), sum(hits.ResolutionWidth)@0 + 85 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(85)), sum(hits.ResolutionWidth)@0 + 86 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(86)), sum(hits.ResolutionWidth)@0 + 87 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(87)), sum(hits.ResolutionWidth)@0 + 88 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(88)), sum(hits.ResolutionWidth)@0 + 89 * count(hits.ResolutionWidth)@1 as sum(hits.ResolutionWidth + Int64(89))] +02)--AggregateExec: mode=Single, gby=[], aggr=[sum(hits.ResolutionWidth), count(hits.ResolutionWidth)] +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[CAST(ResolutionWidth@20 AS Int64) as __common_expr_2], file_type=parquet query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits;