Add ArrowAllocatorLeakCheck to ArrowFileReadWriteSuite and improve scaladoc

garlandz-db · claude · garlandz-db · commit 34866bcd4290 · 2026-03-11T22:43:13.000Z
- Add ArrowFileReadWriteSuite with ArrowAllocatorLeakCheck mixin so the
  suite that directly exercises ArrowFileReadWrite.save/load also asserts
  no Arrow memory leaks after its own tests complete.
- Expand ArrowAllocatorLeakCheck scaladoc with a mixin-order warning and
  correct/incorrect usage examples, since wrong ordering causes
  false-positive leak failures.

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowAllocatorLeakCheck.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowAllocatorLeakCheck.scala
@@ -23,8 +23,20 @@ import org.scalatest.Suite
 import org.apache.spark.sql.util.ArrowUtils
 
 /**
- * Mixin that asserts no memory remains allocated in the Arrow rootAllocator after all
- * tests complete. Mix into any suite that uses ArrowUtils.rootAllocator to catch leaks.
+ * Mixin that asserts no memory remains allocated in the Arrow rootAllocator after all tests
+ * complete. Mix into any suite that uses ArrowUtils.rootAllocator to catch leaks.
+ *
+ * '''Mixin order matters:''' this trait must appear to the RIGHT of any trait that allocates
+ * Arrow memory (e.g. `SharedSparkSession`) in the `extends`/`with` clause, so that
+ * `super.afterAll()` (which releases those resources) runs before the leak assertion.
+ *
+ * {{{
+ *   // Correct: SharedSparkSession released before the check
+ *   class MySuite extends QueryTest with SharedSparkSession with ArrowAllocatorLeakCheck
+ *
+ *   // Wrong: check runs before SharedSparkSession teardown
+ *   class MySuite extends QueryTest with ArrowAllocatorLeakCheck with SharedSparkSession
+ * }}}
  */
 trait ArrowAllocatorLeakCheck extends Suite with BeforeAndAfterAll {
   abstract override def afterAll(): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowFileReadWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowFileReadWriteSuite.scala
@@ -23,7 +23,10 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.util.Utils
 
-class ArrowFileReadWriteSuite extends QueryTest with SharedSparkSession {
+class ArrowFileReadWriteSuite
+    extends QueryTest
+    with SharedSparkSession
+    with ArrowAllocatorLeakCheck {
 
   private var tempDataPath: String = _
 
@@ -33,13 +36,15 @@ class ArrowFileReadWriteSuite extends QueryTest with SharedSparkSession {
   }
 
   test("simple") {
-    val df = spark.range(0, 100, 1, 10).select(
-      col("id"),
-      lit(1).alias("int"),
-      lit(2L).alias("long"),
-      lit(3.0).alias("double"),
-      lit("a string").alias("str"),
-      lit(Array(1.0, 2.0, Double.NaN, Double.NegativeInfinity)).alias("arr"))
+    val df = spark
+      .range(0, 100, 1, 10)
+      .select(
+        col("id"),
+        lit(1).alias("int"),
+        lit(2L).alias("long"),
+        lit(3.0).alias("double"),
+        lit("a string").alias("str"),
+        lit(Array(1.0, 2.0, Double.NaN, Double.NegativeInfinity)).alias("arr"))
 
     val path = new File(tempDataPath, "simple.arrowfile").toPath
     ArrowFileReadWrite.save(df, path)