Skip to content

Commit 59dd951

Browse files
committed
RankExec to optimize ROW_NUMBER <= K
1 parent 832c278 commit 59dd951

8 files changed

Lines changed: 912 additions & 0 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/common/src/config.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,11 @@ config_namespace! {
737737
/// past window functions, if possible
738738
pub enable_window_limits: bool, default = true
739739

740+
/// When set to true, the physical optimizer will rewrite `ROW_NUMBER() OVER
741+
/// (PARTITION BY .. ORDER BY ..) <= K` into a dedicated rank/TopN-per-partition
742+
/// operator that can spill instead of using a full window aggregate.
743+
pub enable_rank_rewrite: bool, default = true
744+
740745
/// When set to true attempts to push down dynamic filters generated by operators into the file scan phase.
741746
/// For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer
742747
/// will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans.

datafusion/physical-optimizer/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ recursive = { workspace = true, optional = true }
5353

5454
[dev-dependencies]
5555
datafusion-expr = { workspace = true }
56+
datafusion-functions-window = { workspace = true }
5657
datafusion-functions-nested = { workspace = true }
5758
insta = { workspace = true }
5859
tokio = { workspace = true }

datafusion/physical-optimizer/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ pub mod limited_distinct_aggregation;
3838
pub mod optimizer;
3939
pub mod output_requirements;
4040
pub mod projection_pushdown;
41+
pub mod rank_topn;
4142
pub use datafusion_pruning as pruning;
4243
mod limit_pushdown_past_window;
4344
pub mod sanity_checker;

datafusion/physical-optimizer/src/optimizer.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use crate::limit_pushdown::LimitPushdown;
3232
use crate::limited_distinct_aggregation::LimitedDistinctAggregation;
3333
use crate::output_requirements::OutputRequirements;
3434
use crate::projection_pushdown::ProjectionPushdown;
35+
use crate::rank_topn::RankTopNPerPartition;
3536
use crate::sanity_checker::SanityCheckPlan;
3637
use crate::topk_aggregation::TopKAggregation;
3738
use crate::update_aggr_exprs::OptimizeAggregateOrder;
@@ -104,6 +105,7 @@ impl PhysicalOptimizer {
104105
// those are handled by the later `FilterPushdown` rule.
105106
// See `FilterPushdownPhase` for more details.
106107
Arc::new(FilterPushdown::new()),
108+
Arc::new(RankTopNPerPartition::new()),
107109
// The EnforceDistribution rule is for adding essential repartitioning to satisfy distribution
108110
// requirements. Please make sure that the whole plan tree is determined before this rule.
109111
// This rule increases parallelism if doing so is beneficial to the physical plan; i.e. at

0 commit comments

Comments
 (0)