From 33db8f5378bb1dc2b601cb7ee648ecc30c7b28fd Mon Sep 17 00:00:00 2001 From: Dodothereal <129273127+Dodothereal@users.noreply.github.com> Date: Tue, 23 Jun 2026 20:12:45 +0200 Subject: [PATCH 1/2] chore(parquet): remove deprecated `coerce_file_schema_to_view_type` / `coerce_file_schema_to_string_type` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both free functions were deprecated in DataFusion 47.0.0 with the suggestion to use `apply_file_schema_type_coercions` instead. Per the API health deprecation guidelines, APIs deprecated in 47.0.0 are eligible for removal now that datafusion is on 55.x. A repo-wide grep confirms zero callers of either function — the only references are the function definitions themselves and two historical `pub use` re-export entries in `mod.rs` and `file_format.rs`, which are removed in this commit. The replacement `apply_file_schema_type_coercions` is in active use at `metadata.rs:403` and `opener/mod.rs:960`. The `#[expect(deprecated)]` attribute on the re-export lines is no longer needed once the deprecated symbols are gone. Closes #23080 (partial). --- .../datasource-parquet/src/file_format.rs | 4 +- datafusion/datasource-parquet/src/mod.rs | 4 +- .../datasource-parquet/src/schema_coercion.rs | 112 ------------------ 3 files changed, 2 insertions(+), 118 deletions(-) diff --git a/datafusion/datasource-parquet/src/file_format.rs b/datafusion/datasource-parquet/src/file_format.rs index 734ec6b536f69..3289fedcfe19c 100644 --- a/datafusion/datasource-parquet/src/file_format.rs +++ b/datafusion/datasource-parquet/src/file_format.rs @@ -23,10 +23,8 @@ use std::ops::Range; use std::sync::Arc; // Re-export so the historical `file_format::*` paths still resolve. -#[expect(deprecated)] pub use crate::schema_coercion::{ - Int96Coercer, apply_file_schema_type_coercions, coerce_file_schema_to_string_type, - coerce_file_schema_to_view_type, coerce_int96_to_resolution, + Int96Coercer, apply_file_schema_type_coercions, coerce_int96_to_resolution, transform_binary_to_string, transform_schema_to_view, }; pub use crate::sink::ParquetSink; diff --git a/datafusion/datasource-parquet/src/mod.rs b/datafusion/datasource-parquet/src/mod.rs index 260d6ee471c89..e0c535d0eb75c 100644 --- a/datafusion/datasource-parquet/src/mod.rs +++ b/datafusion/datasource-parquet/src/mod.rs @@ -54,10 +54,8 @@ pub use reader::*; // Expose so downstream crates can use it pub use row_filter::build_row_filter; pub use row_filter::can_expr_be_pushed_down_with_schemas; pub use row_group_filter::RowGroupAccessPlanFilter; -#[expect(deprecated)] pub use schema_coercion::{ - Int96Coercer, apply_file_schema_type_coercions, coerce_file_schema_to_string_type, - coerce_file_schema_to_view_type, coerce_int96_to_resolution, + Int96Coercer, apply_file_schema_type_coercions, coerce_int96_to_resolution, transform_binary_to_string, transform_schema_to_view, }; pub use sink::ParquetSink; diff --git a/datafusion/datasource-parquet/src/schema_coercion.rs b/datafusion/datasource-parquet/src/schema_coercion.rs index 4598bb525be32..30cd5d7e65948 100644 --- a/datafusion/datasource-parquet/src/schema_coercion.rs +++ b/datafusion/datasource-parquet/src/schema_coercion.rs @@ -418,118 +418,6 @@ fn coerce_int96_to_resolution_impl( Some(transformed_schema) } -/// Coerces the file schema if the table schema uses a view type. -#[deprecated( - since = "47.0.0", - note = "Use `apply_file_schema_type_coercions` instead" -)] -pub fn coerce_file_schema_to_view_type( - table_schema: &Schema, - file_schema: &Schema, -) -> Option { - let mut transform = false; - let table_fields: HashMap<_, _> = table_schema - .fields - .iter() - .map(|f| { - let dt = f.data_type(); - if dt.equals_datatype(&DataType::Utf8View) - || dt.equals_datatype(&DataType::BinaryView) - { - transform = true; - } - (f.name(), dt) - }) - .collect(); - - if !transform { - return None; - } - - let transformed_fields: Vec> = file_schema - .fields - .iter() - .map( - |field| match (table_fields.get(field.name()), field.data_type()) { - (Some(DataType::Utf8View), DataType::Utf8 | DataType::LargeUtf8) => { - field_with_new_type(field, DataType::Utf8View) - } - ( - Some(DataType::BinaryView), - DataType::Binary | DataType::LargeBinary, - ) => field_with_new_type(field, DataType::BinaryView), - _ => Arc::clone(field), - }, - ) - .collect(); - - Some(Schema::new_with_metadata( - transformed_fields, - file_schema.metadata.clone(), - )) -} - -/// If the table schema uses a string type, coerce the file schema to use a string type. -/// -/// See [`ParquetFormat::binary_as_string`](crate::file_format::ParquetFormat::binary_as_string) for details -#[deprecated( - since = "47.0.0", - note = "Use `apply_file_schema_type_coercions` instead" -)] -pub fn coerce_file_schema_to_string_type( - table_schema: &Schema, - file_schema: &Schema, -) -> Option { - let mut transform = false; - let table_fields: HashMap<_, _> = table_schema - .fields - .iter() - .map(|f| (f.name(), f.data_type())) - .collect(); - let transformed_fields: Vec> = file_schema - .fields - .iter() - .map( - |field| match (table_fields.get(field.name()), field.data_type()) { - // table schema uses string type, coerce the file schema to use string type - ( - Some(DataType::Utf8), - DataType::Binary | DataType::LargeBinary | DataType::BinaryView, - ) => { - transform = true; - field_with_new_type(field, DataType::Utf8) - } - // table schema uses large string type, coerce the file schema to use large string type - ( - Some(DataType::LargeUtf8), - DataType::Binary | DataType::LargeBinary | DataType::BinaryView, - ) => { - transform = true; - field_with_new_type(field, DataType::LargeUtf8) - } - // table schema uses string view type, coerce the file schema to use view type - ( - Some(DataType::Utf8View), - DataType::Binary | DataType::LargeBinary | DataType::BinaryView, - ) => { - transform = true; - field_with_new_type(field, DataType::Utf8View) - } - _ => Arc::clone(field), - }, - ) - .collect(); - - if !transform { - None - } else { - Some(Schema::new_with_metadata( - transformed_fields, - file_schema.metadata.clone(), - )) - } -} - /// Create a new field with the specified data type, copying the other /// properties from the input field fn field_with_new_type(field: &FieldRef, new_type: DataType) -> FieldRef { From 5e839f87af1da7c21532828f2f78e461111aea3e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 23 Jun 2026 14:51:19 -0400 Subject: [PATCH 2/2] chore: fix clippy --- datafusion/datasource-parquet/src/file_format.rs | 7 +++++-- datafusion/datasource-parquet/src/mod.rs | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/datafusion/datasource-parquet/src/file_format.rs b/datafusion/datasource-parquet/src/file_format.rs index 3289fedcfe19c..e89cff2aaf7c9 100644 --- a/datafusion/datasource-parquet/src/file_format.rs +++ b/datafusion/datasource-parquet/src/file_format.rs @@ -23,10 +23,13 @@ use std::ops::Range; use std::sync::Arc; // Re-export so the historical `file_format::*` paths still resolve. +#[expect(deprecated)] +pub use crate::schema_coercion::coerce_int96_to_resolution; pub use crate::schema_coercion::{ - Int96Coercer, apply_file_schema_type_coercions, coerce_int96_to_resolution, - transform_binary_to_string, transform_schema_to_view, + Int96Coercer, apply_file_schema_type_coercions, transform_binary_to_string, + transform_schema_to_view, }; + pub use crate::sink::ParquetSink; use arrow::datatypes::{Fields, Schema, SchemaRef}; diff --git a/datafusion/datasource-parquet/src/mod.rs b/datafusion/datasource-parquet/src/mod.rs index e0c535d0eb75c..250b36ad6d3c5 100644 --- a/datafusion/datasource-parquet/src/mod.rs +++ b/datafusion/datasource-parquet/src/mod.rs @@ -54,9 +54,11 @@ pub use reader::*; // Expose so downstream crates can use it pub use row_filter::build_row_filter; pub use row_filter::can_expr_be_pushed_down_with_schemas; pub use row_group_filter::RowGroupAccessPlanFilter; +#[expect(deprecated)] +pub use schema_coercion::coerce_int96_to_resolution; pub use schema_coercion::{ - Int96Coercer, apply_file_schema_type_coercions, coerce_int96_to_resolution, - transform_binary_to_string, transform_schema_to_view, + Int96Coercer, apply_file_schema_type_coercions, transform_binary_to_string, + transform_schema_to_view, }; pub use sink::ParquetSink; pub use virtual_column::ParquetVirtualColumn;