From 083684218728c5330b67e94313dc0f749efc6d39 Mon Sep 17 00:00:00 2001 From: Minni Mittal Date: Tue, 26 May 2026 10:46:01 +0000 Subject: [PATCH 1/5] [CORE] Route bitmap_construct_agg to native Velox execution Register bitmap_construct_agg as a supported aggregate expression in the Velox backend, allowing it to be executed natively instead of falling back to vanilla Spark. Changes: - Add BITMAP_CONSTRUCT_AGG constant to ExpressionNames - Register Sig[BitmapConstructAgg] in Spark 3.5, 4.0, and 4.1 shims - Add bitmap_construct_agg to C++ plan validator allowed list Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc | 3 ++- .../scala/org/apache/gluten/expression/ExpressionNames.scala | 1 + .../org/apache/gluten/sql/shims/spark35/Spark35Shims.scala | 3 ++- .../org/apache/gluten/sql/shims/spark40/Spark40Shims.scala | 3 ++- .../org/apache/gluten/sql/shims/spark41/Spark41Shims.scala | 3 ++- 5 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc index 3ea800b60ce..1b1b2cbe854 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc @@ -1323,7 +1323,8 @@ bool SubstraitToVeloxPlanValidator::validate(const ::substrait::AggregateRel& ag "regr_slope", "regr_intercept", "regr_sxy", - "regr_replacement"}; + "regr_replacement", + "bitmap_construct_agg"}; auto udafFuncs = UdfLoader::getInstance()->getRegisteredUdafNames(); diff --git a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala index d4afb7ff739..edd285b0f84 100644 --- a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala +++ b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala @@ -32,6 +32,7 @@ object ExpressionNames { final val COLLECT_LIST = "collect_list" final val COLLECT_SET = "collect_set" final val BLOOM_FILTER_AGG = "bloom_filter_agg" + final val BITMAP_CONSTRUCT_AGG = "bitmap_construct_agg" final val VAR_SAMP = "var_samp" final val VAR_POP = "var_pop" final val BIT_AND_AGG = "bit_and" diff --git a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala index 8be204816a8..28c1bb177a8 100644 --- a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala +++ b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala @@ -80,7 +80,8 @@ class Spark35Shims extends SparkShims { Sig[RegrSlope](ExpressionNames.REGR_SLOPE), Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT), Sig[RegrSXY](ExpressionNames.REGR_SXY), - Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT) + Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT), + Sig[BitmapConstructAgg](ExpressionNames.BITMAP_CONSTRUCT_AGG) ) } diff --git a/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala b/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala index fb38af30609..6363be33038 100644 --- a/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala +++ b/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala @@ -85,7 +85,8 @@ class Spark40Shims extends SparkShims { Sig[RegrSlope](ExpressionNames.REGR_SLOPE), Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT), Sig[RegrSXY](ExpressionNames.REGR_SXY), - Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT) + Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT), + Sig[BitmapConstructAgg](ExpressionNames.BITMAP_CONSTRUCT_AGG) ) } diff --git a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala index 238c8ded813..3aece98d861 100644 --- a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala +++ b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala @@ -84,7 +84,8 @@ class Spark41Shims extends SparkShims { Sig[RegrSlope](ExpressionNames.REGR_SLOPE), Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT), Sig[RegrSXY](ExpressionNames.REGR_SXY), - Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT) + Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT), + Sig[BitmapConstructAgg](ExpressionNames.BITMAP_CONSTRUCT_AGG) ) } From c6777817d845c48ab3f0344d07d6474726d5c8ab Mon Sep 17 00:00:00 2001 From: Minni Mittal Date: Tue, 26 May 2026 17:45:56 +0000 Subject: [PATCH 2/5] Add plan-shape assertion test for bitmap_construct_agg Adds a test verifying that bitmap_construct_agg routes to native Velox execution (HashAggregateExecBaseTransformer) rather than falling back to vanilla Spark. Test added for Spark 3.5, 4.0, and 4.1. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../GlutenBitmapExpressionsQuerySuite.scala | 21 ++++++++++++++++++- .../GlutenBitmapExpressionsQuerySuite.scala | 21 ++++++++++++++++++- .../GlutenBitmapExpressionsQuerySuite.scala | 21 ++++++++++++++++++- 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala index e07821857a5..97072bfeff7 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala @@ -16,6 +16,25 @@ */ package org.apache.spark.sql +import org.apache.gluten.execution.HashAggregateExecBaseTransformer + +import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper + class GlutenBitmapExpressionsQuerySuite extends BitmapExpressionsQuerySuite - with GlutenSQLTestsTrait {} + with GlutenSQLTestsTrait + with AdaptiveSparkPlanHelper { + + test("bitmap_construct_agg routes to native") { + val df = spark.sql( + "SELECT bitmap_construct_agg(bitmap_bit_position(col)) " + + "FROM values (1L), (2L), (3L) AS t(col)") + df.collect() + assert( + collectWithSubqueries(df.queryExecution.executedPlan) { + case h: HashAggregateExecBaseTransformer => h + }.nonEmpty, + "Expected native HashAggregateExecBaseTransformer in plan" + ) + } +} diff --git a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala index e07821857a5..97072bfeff7 100644 --- a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala +++ b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala @@ -16,6 +16,25 @@ */ package org.apache.spark.sql +import org.apache.gluten.execution.HashAggregateExecBaseTransformer + +import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper + class GlutenBitmapExpressionsQuerySuite extends BitmapExpressionsQuerySuite - with GlutenSQLTestsTrait {} + with GlutenSQLTestsTrait + with AdaptiveSparkPlanHelper { + + test("bitmap_construct_agg routes to native") { + val df = spark.sql( + "SELECT bitmap_construct_agg(bitmap_bit_position(col)) " + + "FROM values (1L), (2L), (3L) AS t(col)") + df.collect() + assert( + collectWithSubqueries(df.queryExecution.executedPlan) { + case h: HashAggregateExecBaseTransformer => h + }.nonEmpty, + "Expected native HashAggregateExecBaseTransformer in plan" + ) + } +} diff --git a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala index e07821857a5..97072bfeff7 100644 --- a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala +++ b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala @@ -16,6 +16,25 @@ */ package org.apache.spark.sql +import org.apache.gluten.execution.HashAggregateExecBaseTransformer + +import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper + class GlutenBitmapExpressionsQuerySuite extends BitmapExpressionsQuerySuite - with GlutenSQLTestsTrait {} + with GlutenSQLTestsTrait + with AdaptiveSparkPlanHelper { + + test("bitmap_construct_agg routes to native") { + val df = spark.sql( + "SELECT bitmap_construct_agg(bitmap_bit_position(col)) " + + "FROM values (1L), (2L), (3L) AS t(col)") + df.collect() + assert( + collectWithSubqueries(df.queryExecution.executedPlan) { + case h: HashAggregateExecBaseTransformer => h + }.nonEmpty, + "Expected native HashAggregateExecBaseTransformer in plan" + ) + } +} From 1297a33f6b719442a979fc3f7c90ac45e6033e0c Mon Sep 17 00:00:00 2001 From: Minni Mittal Date: Wed, 27 May 2026 12:41:03 +0000 Subject: [PATCH 3/5] Exclude INVALID_BITMAP_POSITION error tests for native execution bitmap_construct_agg offloaded to Velox throws GlutenException instead of SparkArrayIndexOutOfBoundsException. Exclude these error-path tests following the established pattern for native execution error type mismatches. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../org/apache/gluten/utils/velox/VeloxTestSettings.scala | 4 ++++ .../org/apache/gluten/utils/velox/VeloxTestSettings.scala | 4 ++++ .../org/apache/gluten/utils/velox/VeloxTestSettings.scala | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 904bb5ef94f..0680080345f 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -85,6 +85,10 @@ class VeloxTestSettings extends BackendTestSettings { "INCONSISTENT_BEHAVIOR_CROSS_VERSION: compatibility with Spark 2.4/3.2 in reading/writing dates") // Doesn't support unhex with failOnError=true. .exclude("CONVERSION_INVALID_INPUT: to_binary conversion function hex") + // bitmap_construct_agg offloaded to Velox throws GlutenException instead of + // SparkArrayIndexOutOfBoundsException. + .exclude("INVALID_BITMAP_POSITION: position out of bounds") + .exclude("INVALID_BITMAP_POSITION: negative position") enableSuite[GlutenQueryParsingErrorsSuite] enableSuite[GlutenArithmeticExpressionSuite] .exclude("SPARK-45786: Decimal multiply, divide, remainder, quot") diff --git a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index f564324b141..3692322e6e9 100644 --- a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -282,6 +282,10 @@ class VeloxTestSettings extends BackendTestSettings { "INCONSISTENT_BEHAVIOR_CROSS_VERSION: compatibility with Spark 2.4/3.2 in reading/writing dates") // Doesn't support unhex with failOnError=true. .exclude("CONVERSION_INVALID_INPUT: to_binary conversion function hex") + // bitmap_construct_agg offloaded to Velox throws GlutenException instead of + // SparkArrayIndexOutOfBoundsException. + .exclude("INVALID_BITMAP_POSITION: position out of bounds") + .exclude("INVALID_BITMAP_POSITION: negative position") enableSuite[GlutenQueryParsingErrorsSuite] enableSuite[GlutenQueryContextSuite] enableSuite[GlutenQueryExecutionAnsiErrorsSuite] diff --git a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index d43199709c2..d0c0a144442 100644 --- a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -293,6 +293,10 @@ class VeloxTestSettings extends BackendTestSettings { "INCONSISTENT_BEHAVIOR_CROSS_VERSION: compatibility with Spark 2.4/3.2 in reading/writing dates") // Doesn't support unhex with failOnError=true. .exclude("CONVERSION_INVALID_INPUT: to_binary conversion function hex") + // bitmap_construct_agg offloaded to Velox throws GlutenException instead of + // SparkArrayIndexOutOfBoundsException. + .exclude("INVALID_BITMAP_POSITION: position out of bounds") + .exclude("INVALID_BITMAP_POSITION: negative position") enableSuite[GlutenQueryParsingErrorsSuite] enableSuite[GlutenQueryContextSuite] enableSuite[GlutenQueryExecutionAnsiErrorsSuite] From 63b3699a33fba0a3c2f8d41a3e66fe10cc76f8f7 Mon Sep 17 00:00:00 2001 From: Minni Mittal Date: Fri, 29 May 2026 05:30:12 +0000 Subject: [PATCH 4/5] Move BitmapConstructAgg registration to Velox-only extraExpressionMappings Move Sig[BitmapConstructAgg] from SparkXShims.aggregateExpressionMappings (all-backend) to VeloxSparkPlanExecApi.extraExpressionMappings (Velox-only). This prevents ClickHouse backend from attempting to push down bitmap_construct_agg, which it does not support. Follows the same pattern as BloomFilterAgg registration. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala | 1 + .../org/apache/gluten/sql/shims/spark35/Spark35Shims.scala | 3 +-- .../org/apache/gluten/sql/shims/spark40/Spark40Shims.scala | 3 +-- .../org/apache/gluten/sql/shims/spark41/Spark41Shims.scala | 3 +-- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala index b7a1e172b2c..92956daae4a 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala @@ -1130,6 +1130,7 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi with Logging { Sig[CollectSet](ExpressionNames.COLLECT_SET), Sig[VeloxBloomFilterMightContain](ExpressionNames.MIGHT_CONTAIN), Sig[VeloxBloomFilterAggregate](ExpressionNames.BLOOM_FILTER_AGG), + Sig[BitmapConstructAgg](ExpressionNames.BITMAP_CONSTRUCT_AGG), Sig[MapFilter](ExpressionNames.MAP_FILTER), Sig[AssertNotNull](ExpressionNames.ASSERT_NOT_NULL), // For test purpose. diff --git a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala index 28c1bb177a8..8be204816a8 100644 --- a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala +++ b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala @@ -80,8 +80,7 @@ class Spark35Shims extends SparkShims { Sig[RegrSlope](ExpressionNames.REGR_SLOPE), Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT), Sig[RegrSXY](ExpressionNames.REGR_SXY), - Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT), - Sig[BitmapConstructAgg](ExpressionNames.BITMAP_CONSTRUCT_AGG) + Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT) ) } diff --git a/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala b/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala index 6363be33038..fb38af30609 100644 --- a/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala +++ b/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala @@ -85,8 +85,7 @@ class Spark40Shims extends SparkShims { Sig[RegrSlope](ExpressionNames.REGR_SLOPE), Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT), Sig[RegrSXY](ExpressionNames.REGR_SXY), - Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT), - Sig[BitmapConstructAgg](ExpressionNames.BITMAP_CONSTRUCT_AGG) + Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT) ) } diff --git a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala index 3aece98d861..238c8ded813 100644 --- a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala +++ b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala @@ -84,8 +84,7 @@ class Spark41Shims extends SparkShims { Sig[RegrSlope](ExpressionNames.REGR_SLOPE), Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT), Sig[RegrSXY](ExpressionNames.REGR_SXY), - Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT), - Sig[BitmapConstructAgg](ExpressionNames.BITMAP_CONSTRUCT_AGG) + Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT) ) } From f017bbd44b9214085997738146bd744f1f0850ce Mon Sep 17 00:00:00 2001 From: Minni Mittal Date: Fri, 29 May 2026 07:36:02 +0000 Subject: [PATCH 5/5] Use Class.forName for BitmapConstructAgg registration in extraExpressionMappings BitmapConstructAgg only exists in Spark 3.5+. Use runtime class loading with scala.util.Try to gracefully handle Spark 3.3/3.4 where the class is absent. This keeps the registration Velox-only (CH unaffected) and avoids compilation failures on older Spark versions. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala index 92956daae4a..556c036d041 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala @@ -1130,12 +1130,17 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi with Logging { Sig[CollectSet](ExpressionNames.COLLECT_SET), Sig[VeloxBloomFilterMightContain](ExpressionNames.MIGHT_CONTAIN), Sig[VeloxBloomFilterAggregate](ExpressionNames.BLOOM_FILTER_AGG), - Sig[BitmapConstructAgg](ExpressionNames.BITMAP_CONSTRUCT_AGG), Sig[MapFilter](ExpressionNames.MAP_FILTER), Sig[AssertNotNull](ExpressionNames.ASSERT_NOT_NULL), // For test purpose. Sig[VeloxDummyExpression](VeloxDummyExpression.VELOX_DUMMY_EXPRESSION) - ) + ) ++ scala.util.Try( + // scalastyle:off classforname + Sig( + Class.forName("org.apache.spark.sql.catalyst.expressions.BitmapConstructAgg"), + ExpressionNames.BITMAP_CONSTRUCT_AGG) + // scalastyle:on classforname + ).toOption.toSeq } override def rewriteSpillPath(path: String): String = {