From 1626cf7d8cc862b79b365158bc4ba255424eebe4 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 4 Mar 2026 18:14:17 -0500 Subject: [PATCH 1/2] Improve documentation for AggregateUdfImpl::simplify --- datafusion/expr/src/expr.rs | 4 ++-- datafusion/expr/src/function.rs | 40 +++++++++++++++------------------ datafusion/expr/src/udaf.rs | 31 +++++++++++++------------ datafusion/expr/src/udf.rs | 2 +- datafusion/expr/src/udwf.rs | 27 ++++++++++++---------- 5 files changed, 53 insertions(+), 51 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index ef0187a8782d7..5c6acd480e9ef 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1010,7 +1010,7 @@ impl WindowFunctionDefinition { } } - /// Return the inner window simplification function, if any + /// Returns this window function's simplification hook, if any. /// /// See [`WindowFunctionSimplification`] for more information pub fn simplify(&self) -> Option { @@ -1097,7 +1097,7 @@ impl WindowFunction { } } - /// Return the inner window simplification function, if any + /// Returns this window function's simplification hook, if any. /// /// See [`WindowFunctionSimplification`] for more information pub fn simplify(&self) -> Option { diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs index 68d2c9073241b..68865cbe1ca54 100644 --- a/datafusion/expr/src/function.rs +++ b/datafusion/expr/src/function.rs @@ -27,6 +27,8 @@ pub use datafusion_functions_aggregate_common::accumulator::{ AccumulatorArgs, AccumulatorFactoryFunction, StateFieldsArgs, }; +use crate::expr::{AggregateFunction, WindowFunction}; +use crate::simplify::SimplifyContext; pub use datafusion_functions_window_common::expr::ExpressionArgs; pub use datafusion_functions_window_common::field::WindowUDFFieldArgs; pub use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; @@ -64,28 +66,22 @@ pub type PartitionEvaluatorFactory = pub type StateTypeFunction = Arc Result>> + Send + Sync>; -/// [crate::udaf::AggregateUDFImpl::simplify] simplifier closure -/// A closure with two arguments: -/// * 'aggregate_function': [crate::expr::AggregateFunction] for which simplified has been invoked -/// * 'info': [crate::simplify::SimplifyContext] +/// Type alias for [crate::udaf::AggregateUDFImpl::simplify]. /// -/// Closure returns simplified [Expr] or an error. -pub type AggregateFunctionSimplification = Box< - dyn Fn( - crate::expr::AggregateFunction, - &crate::simplify::SimplifyContext, - ) -> Result, ->; +/// This closure is invoked with: +/// * `aggregate_function`: [AggregateFunction] with already simplified arguments +/// * `info`: [SimplifyContext] +/// +/// It returns a simplified [Expr] or an error. +pub type AggregateFunctionSimplification = + Box Result>; -/// [crate::udwf::WindowUDFImpl::simplify] simplifier closure -/// A closure with two arguments: -/// * 'window_function': [crate::expr::WindowFunction] for which simplified has been invoked -/// * 'info': [crate::simplify::SimplifyContext] +/// Type alias for [crate::udwf::WindowUDFImpl::simplify]. +/// +/// This closure is invoked with: +/// * `window_function`: [WindowFunction] with already simplified arguments +/// * `info`: [SimplifyContext] /// -/// Closure returns simplified [Expr] or an error. -pub type WindowFunctionSimplification = Box< - dyn Fn( - crate::expr::WindowFunction, - &crate::simplify::SimplifyContext, - ) -> Result, ->; +/// It returns a simplified [Expr] or an error. +pub type WindowFunctionSimplification = + Box Result>; diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index ee38077dbf304..adc7d701a1fc5 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -294,7 +294,7 @@ impl AggregateUDF { self.inner.reverse_expr() } - /// Do the function rewrite + /// Returns this aggregate function's simplification hook, if any. /// /// See [`AggregateUDFImpl::simplify`] for more details. pub fn simplify(&self) -> Option { @@ -651,26 +651,29 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync { AggregateOrderSensitivity::HardRequirement } - /// Optionally apply per-UDaF simplification / rewrite rules. + /// Returns an optional hook for simplifying this user-defined aggregate. /// - /// This can be used to apply function specific simplification rules during - /// optimization (e.g. `arrow_cast` --> `Expr::Cast`). The default - /// implementation does nothing. + /// Use this hook to apply function-specific rewrites during optimization. + /// The default implementation returns `None`. /// - /// Note that DataFusion handles simplifying arguments and "constant - /// folding" (replacing a function call with constant arguments such as - /// `my_add(1,2) --> 3` ). Thus, there is no need to implement such - /// optimizations manually for specific UDFs. + /// For example, `percentile_cont(x, 0.0)` and `percentile_cont(x, 1.0)` can + /// be rewritten to `MIN(x)` or `MAX(x)` depending on the `ORDER BY` + /// direction. + /// + /// DataFusion already simplifies arguments and performs constant folding + /// (for example, `my_add(1, 2) -> 3`), so there is usually no need to + /// implement those optimizations manually for specific UDFs. /// /// # Returns /// - /// [None] if simplify is not defined or, + /// `None` if simplify is not defined. /// - /// Or, a closure with two arguments: - /// * 'aggregate_function': [AggregateFunction] for which simplified has been invoked - /// * 'info': [crate::simplify::SimplifyContext] + /// Or, a closure ([`AggregateFunctionSimplification`]) invoked with: + /// * `aggregate_function`: [AggregateFunction] with already simplified + /// arguments + /// * `info`: [crate::simplify::SimplifyContext] /// - /// closure returns simplified [Expr] or an error. + /// The closure returns a simplified [Expr] or an error. /// /// # Notes /// diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 9705006e41afc..9286dd30b11a7 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -217,7 +217,7 @@ impl ScalarUDF { self.inner.return_field_from_args(args) } - /// Do the function rewrite + /// Returns this scalar function's simplification result. /// /// See [`ScalarUDFImpl::simplify`] for more details. pub fn simplify( diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 8f2b8a0d9bfe5..7f86a69f8712c 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -157,7 +157,7 @@ impl WindowUDF { self.inner.signature() } - /// Do the function rewrite + /// Returns this window function's simplification hook, if any. /// /// See [`WindowUDFImpl::simplify`] for more details. pub fn simplify(&self) -> Option { @@ -344,25 +344,28 @@ pub trait WindowUDFImpl: Debug + DynEq + DynHash + Send + Sync { partition_evaluator_args: PartitionEvaluatorArgs, ) -> Result>; - /// Optionally apply per-UDWF simplification / rewrite rules. + /// Returns an optional hook for simplifying this user-defined window + /// function. /// - /// This can be used to apply function specific simplification rules during - /// optimization. The default implementation does nothing. + /// Use this hook to apply function-specific rewrites during optimization. + /// The default implementation returns `None`. /// - /// Note that DataFusion handles simplifying arguments and "constant - /// folding" (replacing a function call with constant arguments such as - /// `my_add(1,2) --> 3` ). Thus, there is no need to implement such - /// optimizations manually for specific UDFs. + /// DataFusion already simplifies arguments and performs constant folding + /// (for example, `my_add(1, 2) -> 3`), so there is usually no need to + /// implement those optimizations manually for specific UDFs. /// /// Example: /// `advanced_udwf.rs`: /// /// # Returns - /// [None] if simplify is not defined or, + /// `None` if simplify is not defined. /// - /// Or, a closure with two arguments: - /// * 'window_function': [crate::expr::WindowFunction] for which simplified has been invoked - /// * 'info': [crate::simplify::SimplifyContext] + /// Or, a closure ([`WindowFunctionSimplification`]) invoked with: + /// * `window_function`: [WindowFunction] with already simplified + /// arguments + /// * `info`: [crate::simplify::SimplifyContext] + /// + /// The closure returns a simplified [Expr] or an error. /// /// # Notes /// The returned expression must have the same schema as the original From b3f72f2c4730f205bc026cda2c354a9b617eef92 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Mar 2026 09:14:12 -0500 Subject: [PATCH 2/2] Update datafusion/expr/src/udaf.rs Co-authored-by: Yongting You <2010youy01@gmail.com> --- datafusion/expr/src/udaf.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index adc7d701a1fc5..f2e2c53cdbbde 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -661,8 +661,13 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync { /// direction. /// /// DataFusion already simplifies arguments and performs constant folding - /// (for example, `my_add(1, 2) -> 3`), so there is usually no need to - /// implement those optimizations manually for specific UDFs. + /// (for example, `my_add(1, 2) -> 3`). For nested expressions, the optimizer + /// runs simplification in multiple passes, so arguments are typically + /// simplified before this hook is invoked. As a result, UDF implementations + /// usually do not need to handle argument simplification themselves. + /// + /// See configuration `datafusion.optimizer.max_passes` for details on how many + /// optimization passes may be applied. /// /// # Returns ///