From 8639a10d57cb42d8b77a4372693732fa222c7ba4 Mon Sep 17 00:00:00 2001 From: fys Date: Thu, 11 Jun 2026 11:25:54 +0800 Subject: [PATCH 1/5] fix: TRY_CAST returns NULL for timestamp/date overflow --- datafusion/common/src/scalar/mod.rs | 44 ++++++++++++++++++- datafusion/expr-common/src/columnar_value.rs | 32 +++++++++++++- .../test_files/datetime/timestamps.slt | 11 +++++ 3 files changed, 85 insertions(+), 2 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index c9013af72619c..e4fd84fcbe123 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -4292,7 +4292,13 @@ impl ScalarValue { .or_else(|| timestamp_to_timestamp_multiplier(&source_type, target_type)) && let Some(value) = self.temporal_scalar_value_as_i64() { - ensure_timestamp_in_bounds(value, multiplier, &source_type, target_type)?; + if cast_options.safe { + if multiplier > 1 && value.checked_mul(multiplier).is_none() { + return ScalarValue::try_new_null(target_type); + } + } else { + ensure_timestamp_in_bounds(value, multiplier, &source_type, target_type)?; + } } let scalar_array = self.to_array()?; @@ -10190,6 +10196,24 @@ mod tests { ); } + #[test] + fn safe_cast_date_to_timestamp_overflow_returns_null() { + let scalar = ScalarValue::Date32(Some(i32::MAX)); + let safe_options = CastOptions { + safe: true, + ..DEFAULT_CAST_OPTIONS + }; + + let casted = scalar + .cast_to_with_options( + &DataType::Timestamp(TimeUnit::Nanosecond, None), + &safe_options, + ) + .expect("expected safe cast to return null"); + + assert_eq!(casted, ScalarValue::TimestampNanosecond(None, None)); + } + #[test] fn cast_timestamp_to_timestamp_overflow_returns_error() { let scalar = ScalarValue::TimestampSecond(Some(i64::MAX), None); @@ -10203,6 +10227,24 @@ mod tests { ); } + #[test] + fn safe_cast_timestamp_to_timestamp_overflow_returns_null() { + let scalar = ScalarValue::TimestampSecond(Some(i64::MAX), None); + let safe_options = CastOptions { + safe: true, + ..DEFAULT_CAST_OPTIONS + }; + + let casted = scalar + .cast_to_with_options( + &DataType::Timestamp(TimeUnit::Nanosecond, None), + &safe_options, + ) + .expect("expected safe cast to return null"); + + assert_eq!(casted, ScalarValue::TimestampNanosecond(None, None)); + } + #[test] fn null_dictionary_scalar_produces_null_dictionary_array() { let dictionary_scalar = ScalarValue::Dictionary( diff --git a/datafusion/expr-common/src/columnar_value.rs b/datafusion/expr-common/src/columnar_value.rs index caeb3f10da752..ef9192c3569d9 100644 --- a/datafusion/expr-common/src/columnar_value.rs +++ b/datafusion/expr-common/src/columnar_value.rs @@ -325,7 +325,9 @@ fn cast_array_by_name( ) { datafusion_common::nested_struct::cast_column(array, cast_type, cast_options) } else { - ensure_temporal_array_timestamp_bounds(array, cast_type)?; + if !cast_options.safe { + ensure_temporal_array_timestamp_bounds(array, cast_type)?; + } Ok(kernels::cast::cast_with_options( array, cast_type, @@ -766,4 +768,32 @@ mod tests { "unexpected error: {err}" ); } + + #[test] + fn safe_cast_timestamp_array_to_timestamp_overflow_returns_null() { + let overflow_value = i64::MAX / 1_000_000_000 + 1; + let array: ArrayRef = + Arc::new(TimestampSecondArray::from(vec![Some(overflow_value)])); + let value = ColumnarValue::Array(array); + let safe_options = CastOptions { + safe: true, + ..DEFAULT_CAST_OPTIONS + }; + + let casted = value + .cast_to( + &DataType::Timestamp(TimeUnit::Nanosecond, None), + Some(&safe_options), + ) + .expect("expected safe cast to return null"); + + let ColumnarValue::Array(array) = casted else { + panic!("expected array after cast"); + }; + let array = array + .as_any() + .downcast_ref::() + .expect("expected TimestampNanosecondArray"); + assert!(array.is_null(0)); + } } diff --git a/datafusion/sqllogictest/test_files/datetime/timestamps.slt b/datafusion/sqllogictest/test_files/datetime/timestamps.slt index 89c6f0a12139e..d3b68b2f370d1 100644 --- a/datafusion/sqllogictest/test_files/datetime/timestamps.slt +++ b/datafusion/sqllogictest/test_files/datetime/timestamps.slt @@ -5379,6 +5379,17 @@ SELECT to_timestamp(arrow_cast(-9223372036, 'Int64')); query error converted value exceeds the representable i64 range SELECT to_timestamp(arrow_cast(9223372037, 'Int64')); +# TRY_CAST returns NULL for timestamp/date casts that overflow +query P +SELECT TRY_CAST(arrow_cast(9223372037, 'Timestamp(s)') AS TIMESTAMP(9)); +---- +NULL + +query P +SELECT TRY_CAST(DATE '3000-01-01' AS TIMESTAMP(9)); +---- +NULL + # Float truncation behavior query P SELECT to_timestamp_seconds(arrow_cast(-1.9, 'Float64')); From 356eea9d5b55525b69b1f82cd8d35a34edc00660 Mon Sep 17 00:00:00 2001 From: fys Date: Thu, 11 Jun 2026 11:51:51 +0800 Subject: [PATCH 2/5] add VALUES-based TRY_CAST temporal overflow test --- .../sqllogictest/test_files/datetime/timestamps.slt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/datafusion/sqllogictest/test_files/datetime/timestamps.slt b/datafusion/sqllogictest/test_files/datetime/timestamps.slt index d3b68b2f370d1..06740fa0f5439 100644 --- a/datafusion/sqllogictest/test_files/datetime/timestamps.slt +++ b/datafusion/sqllogictest/test_files/datetime/timestamps.slt @@ -5390,6 +5390,14 @@ SELECT TRY_CAST(DATE '3000-01-01' AS TIMESTAMP(9)); ---- NULL +query P +SELECT TRY_CAST(ts AS TIMESTAMP(9)) AS ts +FROM ( + VALUES (arrow_cast(9223372037, 'Timestamp(s)')) +) t(ts); +---- +NULL + # Float truncation behavior query P SELECT to_timestamp_seconds(arrow_cast(-1.9, 'Float64')); From 3f4ad71567c2756af22afe5c561bc1b3d91e2497 Mon Sep 17 00:00:00 2001 From: fys Date: Fri, 12 Jun 2026 10:40:06 +0800 Subject: [PATCH 3/5] test: add date array cast overflow coverage --- .../src/engines/datafusion_engine/runner.rs | 27 ++++++++++++++++++- .../test_files/datetime/timestamps.slt | 4 +++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs index 08facc48005dc..3c83c3e1a44ed 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. +use std::any::Any; use std::collections::HashMap; +use std::panic::AssertUnwindSafe; use std::sync::{Arc, Mutex}; use std::{path::PathBuf, time::Duration}; @@ -28,6 +30,7 @@ use async_trait::async_trait; use datafusion::physical_plan::common::collect; use datafusion::physical_plan::execute_stream; use datafusion::prelude::SessionContext; +use futures::FutureExt; use indicatif::ProgressBar; use log::Level::{Debug, Info}; use log::{debug, log_enabled, warn}; @@ -154,7 +157,19 @@ impl sqllogictest::AsyncDB for DataFusion { let tracked_sql = self.currently_executing_sql_tracker.set_sql(sql); let start = Instant::now(); - let result = run_query(&self.ctx, is_spark_path(&self.relative_path), sql).await; + let result = AssertUnwindSafe(run_query( + &self.ctx, + is_spark_path(&self.relative_path), + sql, + )) + .catch_unwind() + .await + .unwrap_or_else(|panic| { + Err(DFSqlLogicTestError::Other(format!( + "panic: {}", + panic_message(panic.as_ref()) + ))) + }); let duration = start.elapsed(); self.currently_executing_sql_tracker.remove_sql(tracked_sql); @@ -199,6 +214,16 @@ impl sqllogictest::AsyncDB for DataFusion { } } +fn panic_message(panic: &(dyn Any + Send)) -> &str { + if let Some(message) = panic.downcast_ref::<&str>() { + message + } else if let Some(message) = panic.downcast_ref::() { + message + } else { + "unknown panic" + } +} + async fn run_query( ctx: &SessionContext, is_spark_path: bool, diff --git a/datafusion/sqllogictest/test_files/datetime/timestamps.slt b/datafusion/sqllogictest/test_files/datetime/timestamps.slt index 06740fa0f5439..b82bc0bb8a258 100644 --- a/datafusion/sqllogictest/test_files/datetime/timestamps.slt +++ b/datafusion/sqllogictest/test_files/datetime/timestamps.slt @@ -5390,6 +5390,10 @@ SELECT TRY_CAST(DATE '3000-01-01' AS TIMESTAMP(9)); ---- NULL +query error panic: attempt to multiply with overflow +SELECT TRY_CAST(d AS TIMESTAMP(9)) +FROM (VALUES (DATE '3000-01-01')) t(d); + query P SELECT TRY_CAST(ts AS TIMESTAMP(9)) AS ts FROM ( From 970d1aef264a12580a2679db4ee90f3f924d59e2 Mon Sep 17 00:00:00 2001 From: fys Date: Fri, 12 Jun 2026 10:40:22 +0800 Subject: [PATCH 4/5] refactor: reuse timestamp bounds check for scalar casts --- datafusion/common/src/scalar/mod.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index e4fd84fcbe123..8a8a47b3bb50b 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -4292,12 +4292,13 @@ impl ScalarValue { .or_else(|| timestamp_to_timestamp_multiplier(&source_type, target_type)) && let Some(value) = self.temporal_scalar_value_as_i64() { - if cast_options.safe { - if multiplier > 1 && value.checked_mul(multiplier).is_none() { + match ensure_timestamp_in_bounds(value, multiplier, &source_type, target_type) + { + Ok(()) => {} + Err(_) if cast_options.safe => { return ScalarValue::try_new_null(target_type); } - } else { - ensure_timestamp_in_bounds(value, multiplier, &source_type, target_type)?; + Err(e) => return Err(e), } } From 14d0a6f88762f745220610ebfc697e4ca0cf116a Mon Sep 17 00:00:00 2001 From: fys Date: Fri, 12 Jun 2026 11:43:08 +0800 Subject: [PATCH 5/5] Revert "test: add date array cast overflow coverage" This reverts commit 3f4ad71567c2756af22afe5c561bc1b3d91e2497. --- .../src/engines/datafusion_engine/runner.rs | 27 +------------------ .../test_files/datetime/timestamps.slt | 4 --- 2 files changed, 1 insertion(+), 30 deletions(-) diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs index 3c83c3e1a44ed..08facc48005dc 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs @@ -15,9 +15,7 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; use std::collections::HashMap; -use std::panic::AssertUnwindSafe; use std::sync::{Arc, Mutex}; use std::{path::PathBuf, time::Duration}; @@ -30,7 +28,6 @@ use async_trait::async_trait; use datafusion::physical_plan::common::collect; use datafusion::physical_plan::execute_stream; use datafusion::prelude::SessionContext; -use futures::FutureExt; use indicatif::ProgressBar; use log::Level::{Debug, Info}; use log::{debug, log_enabled, warn}; @@ -157,19 +154,7 @@ impl sqllogictest::AsyncDB for DataFusion { let tracked_sql = self.currently_executing_sql_tracker.set_sql(sql); let start = Instant::now(); - let result = AssertUnwindSafe(run_query( - &self.ctx, - is_spark_path(&self.relative_path), - sql, - )) - .catch_unwind() - .await - .unwrap_or_else(|panic| { - Err(DFSqlLogicTestError::Other(format!( - "panic: {}", - panic_message(panic.as_ref()) - ))) - }); + let result = run_query(&self.ctx, is_spark_path(&self.relative_path), sql).await; let duration = start.elapsed(); self.currently_executing_sql_tracker.remove_sql(tracked_sql); @@ -214,16 +199,6 @@ impl sqllogictest::AsyncDB for DataFusion { } } -fn panic_message(panic: &(dyn Any + Send)) -> &str { - if let Some(message) = panic.downcast_ref::<&str>() { - message - } else if let Some(message) = panic.downcast_ref::() { - message - } else { - "unknown panic" - } -} - async fn run_query( ctx: &SessionContext, is_spark_path: bool, diff --git a/datafusion/sqllogictest/test_files/datetime/timestamps.slt b/datafusion/sqllogictest/test_files/datetime/timestamps.slt index b82bc0bb8a258..06740fa0f5439 100644 --- a/datafusion/sqllogictest/test_files/datetime/timestamps.slt +++ b/datafusion/sqllogictest/test_files/datetime/timestamps.slt @@ -5390,10 +5390,6 @@ SELECT TRY_CAST(DATE '3000-01-01' AS TIMESTAMP(9)); ---- NULL -query error panic: attempt to multiply with overflow -SELECT TRY_CAST(d AS TIMESTAMP(9)) -FROM (VALUES (DATE '3000-01-01')) t(d); - query P SELECT TRY_CAST(ts AS TIMESTAMP(9)) AS ts FROM (