diff --git a/datafusion/physical-plan/src/spill/mod.rs b/datafusion/physical-plan/src/spill/mod.rs index 3c95a1da5b33c..00c9ac0631ab7 100644 --- a/datafusion/physical-plan/src/spill/mod.rs +++ b/datafusion/physical-plan/src/spill/mod.rs @@ -30,7 +30,7 @@ pub use spill_manager::SpillManager; use std::fs::File; use std::io::BufReader; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; @@ -245,35 +245,6 @@ impl RecordBatchStream for SpillReaderStream { } } -/// Spill the `RecordBatch` to disk as smaller batches -/// split by `batch_size_rows` -#[deprecated( - since = "46.0.0", - note = "This method is deprecated. Use `SpillManager::spill_record_batch_by_size` instead." -)] -#[expect(clippy::needless_pass_by_value)] -pub fn spill_record_batch_by_size( - batch: &RecordBatch, - path: PathBuf, - schema: SchemaRef, - batch_size_rows: usize, -) -> Result<()> { - let mut offset = 0; - let total_rows = batch.num_rows(); - let mut writer = - IPCStreamWriter::new(&path, schema.as_ref(), SpillCompression::Uncompressed)?; - - while offset < total_rows { - let length = std::cmp::min(total_rows - offset, batch_size_rows); - let batch = batch.slice(offset, length); - offset += batch.num_rows(); - writer.write(&batch)?; - } - writer.finish()?; - - Ok(()) -} - /// Write in Arrow IPC Stream format to a file. /// /// Stream format is used for spill because it supports dictionary replacement, and the random diff --git a/docs/source/library-user-guide/upgrading/55.0.0.md b/docs/source/library-user-guide/upgrading/55.0.0.md index 6d1f834abfac0..d0778a3619c4e 100644 --- a/docs/source/library-user-guide/upgrading/55.0.0.md +++ b/docs/source/library-user-guide/upgrading/55.0.0.md @@ -30,6 +30,14 @@ to the main branch and are awaiting release in this version. `datafusion_common::config::Dialect::AVAILABLE` has been removed. Use `Dialect::available()` instead. +### `spill_record_batch_by_size` removed + +`datafusion_physical_plan::spill::spill_record_batch_by_size` has been removed. +This function was deprecated in DataFusion `46.0.0`. + +Use `datafusion_physical_plan::spill::SpillManager::spill_record_batch_by_size` +instead. + ### Decimal scalar formatting uses human-readable values Decimal scalar literals in `EXPLAIN` output, expression display strings, and