Skip to content

Commit 34866bc

Browse files
garlandz-dbclaude
andcommitted
Add ArrowAllocatorLeakCheck to ArrowFileReadWriteSuite and improve scaladoc
- Add ArrowFileReadWriteSuite with ArrowAllocatorLeakCheck mixin so the suite that directly exercises ArrowFileReadWrite.save/load also asserts no Arrow memory leaks after its own tests complete. - Expand ArrowAllocatorLeakCheck scaladoc with a mixin-order warning and correct/incorrect usage examples, since wrong ordering causes false-positive leak failures. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 8d7b8b7 commit 34866bc

2 files changed

Lines changed: 27 additions & 10 deletions

File tree

sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowAllocatorLeakCheck.scala

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,20 @@ import org.scalatest.Suite
2323
import org.apache.spark.sql.util.ArrowUtils
2424

2525
/**
26-
* Mixin that asserts no memory remains allocated in the Arrow rootAllocator after all
27-
* tests complete. Mix into any suite that uses ArrowUtils.rootAllocator to catch leaks.
26+
* Mixin that asserts no memory remains allocated in the Arrow rootAllocator after all tests
27+
* complete. Mix into any suite that uses ArrowUtils.rootAllocator to catch leaks.
28+
*
29+
* '''Mixin order matters:''' this trait must appear to the RIGHT of any trait that allocates
30+
* Arrow memory (e.g. `SharedSparkSession`) in the `extends`/`with` clause, so that
31+
* `super.afterAll()` (which releases those resources) runs before the leak assertion.
32+
*
33+
* {{{
34+
* // Correct: SharedSparkSession released before the check
35+
* class MySuite extends QueryTest with SharedSparkSession with ArrowAllocatorLeakCheck
36+
*
37+
* // Wrong: check runs before SharedSparkSession teardown
38+
* class MySuite extends QueryTest with ArrowAllocatorLeakCheck with SharedSparkSession
39+
* }}}
2840
*/
2941
trait ArrowAllocatorLeakCheck extends Suite with BeforeAndAfterAll {
3042
abstract override def afterAll(): Unit = {

sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowFileReadWriteSuite.scala

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ import org.apache.spark.sql.functions._
2323
import org.apache.spark.sql.test.SharedSparkSession
2424
import org.apache.spark.util.Utils
2525

26-
class ArrowFileReadWriteSuite extends QueryTest with SharedSparkSession {
26+
class ArrowFileReadWriteSuite
27+
extends QueryTest
28+
with SharedSparkSession
29+
with ArrowAllocatorLeakCheck {
2730

2831
private var tempDataPath: String = _
2932

@@ -33,13 +36,15 @@ class ArrowFileReadWriteSuite extends QueryTest with SharedSparkSession {
3336
}
3437

3538
test("simple") {
36-
val df = spark.range(0, 100, 1, 10).select(
37-
col("id"),
38-
lit(1).alias("int"),
39-
lit(2L).alias("long"),
40-
lit(3.0).alias("double"),
41-
lit("a string").alias("str"),
42-
lit(Array(1.0, 2.0, Double.NaN, Double.NegativeInfinity)).alias("arr"))
39+
val df = spark
40+
.range(0, 100, 1, 10)
41+
.select(
42+
col("id"),
43+
lit(1).alias("int"),
44+
lit(2L).alias("long"),
45+
lit(3.0).alias("double"),
46+
lit("a string").alias("str"),
47+
lit(Array(1.0, 2.0, Double.NaN, Double.NegativeInfinity)).alias("arr"))
4348

4449
val path = new File(tempDataPath, "simple.arrowfile").toPath
4550
ArrowFileReadWrite.save(df, path)

0 commit comments

Comments
 (0)