From 0d84c07bdf490c1d1ed4f8ab8c8ce145ffc9d23d Mon Sep 17 00:00:00 2001 From: yew1eb Date: Thu, 29 Jan 2026 21:41:22 +0800 Subject: [PATCH] [AURON #1961] Fix Spark 4.0+: unit test catalyst codegen failure due to session artifact isolation --- .../org/apache/auron/AuronFunctionSuite.scala | 8 +------- .../scala/org/apache/auron/AuronQuerySuite.scala | 11 +---------- .../org/apache/auron/BaseAuronSQLSuite.scala | 15 +++++++++++---- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala index 5fc20370c..0a4c1203b 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala @@ -20,7 +20,7 @@ import java.text.SimpleDateFormat import org.apache.spark.sql.{AuronQueryTest, Row} -import org.apache.auron.util.{AuronTestUtils, SparkVersionUtil} +import org.apache.auron.util.AuronTestUtils class AuronFunctionSuite extends AuronQueryTest with BaseAuronSQLSuite { @@ -83,9 +83,6 @@ class AuronFunctionSuite extends AuronQueryTest with BaseAuronSQLSuite { } test("spark hash function") { - // TODO: Fix flaky codegen cache failures in SPARK-4.x, https://github.com/apache/auron/issues/1961 - assume(!SparkVersionUtil.isSparkV40OrGreater) - withTable("t1") { sql("create table t1 using parquet as select array(1, 2) as arr") val functions = @@ -97,9 +94,6 @@ class AuronFunctionSuite extends AuronQueryTest with BaseAuronSQLSuite { } test("expm1 function") { - // TODO: Fix flaky codegen cache failures in SPARK-4.x, https://github.com/apache/auron/issues/1961 - assume(!SparkVersionUtil.isSparkV40OrGreater) - withTable("t1") { sql("create table t1(c1 double) using parquet") sql("insert into t1 values(0.0), (1.1), (2.2)") diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala index a73d17f24..e82eb78f3 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala @@ -20,7 +20,7 @@ import org.apache.spark.sql.{AuronQueryTest, Row} import org.apache.spark.sql.execution.joins.auron.plan.NativeBroadcastJoinExec import org.apache.auron.spark.configuration.SparkAuronConfiguration -import org.apache.auron.util.{AuronTestUtils, SparkVersionUtil} +import org.apache.auron.util.AuronTestUtils class AuronQuerySuite extends AuronQueryTest with BaseAuronSQLSuite with AuronSQLTestHelper { import testImplicits._ @@ -42,9 +42,6 @@ class AuronQuerySuite extends AuronQueryTest with BaseAuronSQLSuite with AuronSQ } test("test filter with year function") { - // TODO: Fix flaky codegen cache failures in SPARK-4.x, https://github.com/apache/auron/issues/1961 - assume(!SparkVersionUtil.isSparkV40OrGreater) - withTable("t1") { sql("create table t1 using parquet as select '2024-12-18' as event_time") checkSparkAnswerAndOperator(s""" @@ -57,9 +54,6 @@ class AuronQuerySuite extends AuronQueryTest with BaseAuronSQLSuite with AuronSQ } test("test select multiple spark ext functions with the same signature") { - // TODO: Fix flaky codegen cache failures in SPARK-4.x, https://github.com/apache/auron/issues/1961 - assume(!SparkVersionUtil.isSparkV40OrGreater) - withTable("t1") { sql("create table t1 using parquet as select '2024-12-18' as event_time") checkSparkAnswerAndOperator("select year(event_time), month(event_time) from t1") @@ -177,9 +171,6 @@ class AuronQuerySuite extends AuronQueryTest with BaseAuronSQLSuite with AuronSQ } test("floor function with long input") { - // TODO: Fix flaky codegen cache failures in SPARK-4.x, https://github.com/apache/auron/issues/1961 - assume(!SparkVersionUtil.isSparkV40OrGreater) - withTable("t1") { sql("create table t1 using parquet as select 1L as c1, 2.2 as c2") checkSparkAnswerAndOperator("select floor(c1), floor(c2) from t1") diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/auron/BaseAuronSQLSuite.scala b/spark-extension-shims-spark/src/test/scala/org/apache/auron/BaseAuronSQLSuite.scala index 587d8f965..a8a9c2593 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/auron/BaseAuronSQLSuite.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/auron/BaseAuronSQLSuite.scala @@ -22,6 +22,8 @@ import org.apache.commons.io.FileUtils import org.apache.spark.SparkConf import org.apache.spark.sql.test.SharedSparkSession +import org.apache.auron.util.SparkVersionUtil + trait BaseAuronSQLSuite extends SharedSparkSession { protected val suiteWorkspace: String = getClass.getResource("/").getPath + "auron-tests-workdir" protected val warehouseDir: String = suiteWorkspace + "/spark-warehouse" @@ -49,7 +51,7 @@ trait BaseAuronSQLSuite extends SharedSparkSession { } override protected def sparkConf: SparkConf = { - super.sparkConf + val conf = super.sparkConf .set("spark.sql.extensions", "org.apache.spark.sql.auron.AuronSparkSessionExtension") .set( "spark.shuffle.manager", @@ -58,8 +60,13 @@ trait BaseAuronSQLSuite extends SharedSparkSession { .set("spark.auron.enable", "true") .set("spark.ui.enabled", "false") .set("spark.sql.warehouse.dir", warehouseDir) - // Avoid the code size overflow error in Spark code generation. - .set("spark.sql.codegen.wholeStage", "false") - .set("spark.sql.codegen.factoryMode", "NO_CODEGEN") + + if (SparkVersionUtil.isSparkV40OrGreater) { + // Spark 4.0+: Disable session artifact isolation, align with Spark 3.x behavior + // Fix Catalyst codegen failure: prevent org.apache.spark.sql.catalyst.expressions.Object + // in isolated dirs from REPL classloader lookup failure + conf.set("spark.sql.artifact.isolation.enabled", "false") + } + conf } }