diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index e700d4a04da3b..e02d2970d7a34 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -317,6 +317,11 @@ impl<'a> BinaryTypeCoercer<'a> { } else if let Some(numeric) = mathematics_numerical_coercion(lhs, rhs) { // Numeric arithmetic, e.g. Int32 + Int32 Ok(Signature::uniform(numeric)) + } else if let Some(numeric) = string_numeric_coercion(lhs, rhs) { + // String to numeric arithmetic, e.g. Int64 + Utf8 or Utf8 + Float64. + // The string operand is coerced to the numeric type of the other + // operand, mirroring the behavior of comparison coercion. + Ok(Signature::uniform(numeric)) } else { plan_err!( "Cannot coerce arithmetic expression {} {} {} to valid types", self.lhs, self.op, self.rhs diff --git a/datafusion/expr-common/src/type_coercion/binary/tests/arithmetic.rs b/datafusion/expr-common/src/type_coercion/binary/tests/arithmetic.rs index 70a8fc0e35a15..dbaee3cc97722 100644 --- a/datafusion/expr-common/src/type_coercion/binary/tests/arithmetic.rs +++ b/datafusion/expr-common/src/type_coercion/binary/tests/arithmetic.rs @@ -20,18 +20,66 @@ use datafusion_common::assert_contains; #[test] fn test_coercion_error() -> Result<()> { + // Boolean is neither numeric nor string, so it cannot be coerced for arithmetic. let coercer = - BinaryTypeCoercer::new(&DataType::Float32, &Operator::Plus, &DataType::Utf8); + BinaryTypeCoercer::new(&DataType::Boolean, &Operator::Plus, &DataType::Boolean); let result_type = coercer.get_input_types(); let e = result_type.unwrap_err(); assert_eq!( e.strip_backtrace(), - "Error during planning: Cannot coerce arithmetic expression Float32 + Utf8 to valid types" + "Error during planning: Cannot coerce arithmetic expression Boolean + Boolean to valid types" ); Ok(()) } +/// Tests that arithmetic operators coerce a string operand to the numeric type +/// of the other operand (string to numeric coercion). +#[test] +fn test_type_coercion_arithmetic_string_numeric() -> Result<()> { + use DataType::*; + + let arithmetic_ops = [ + Operator::Plus, + Operator::Minus, + Operator::Multiply, + Operator::Divide, + Operator::Modulo, + ]; + + for op in arithmetic_ops { + // numeric `op` string -> both coerced to the numeric type + for string_type in [Utf8, LargeUtf8, Utf8View] { + let (lhs, rhs) = + BinaryTypeCoercer::new(&Int64, &op, &string_type).get_input_types()?; + assert_eq!(lhs, Int64, "Op {op}: Int64 vs {string_type} -> lhs"); + assert_eq!(rhs, Int64, "Op {op}: Int64 vs {string_type} -> rhs"); + + // string `op` numeric -> both coerced to the numeric type + let (lhs, rhs) = + BinaryTypeCoercer::new(&string_type, &op, &Float64).get_input_types()?; + assert_eq!(lhs, Float64, "Op {op}: {string_type} vs Float64 -> lhs"); + assert_eq!(rhs, Float64, "Op {op}: {string_type} vs Float64 -> rhs"); + } + } + + // The result type matches same-type numeric arithmetic + test_coercion_binary_rule!(Int64, Utf8, Operator::Plus, Int64); + test_coercion_binary_rule!(Utf8, Float64, Operator::Multiply, Float64); + + // string `op` string remains unsupported as the target type is ambiguous + let err = BinaryTypeCoercer::new(&Utf8, &Operator::Plus, &Utf8) + .get_input_types() + .unwrap_err() + .to_string(); + assert_contains!( + &err, + "Cannot coerce arithmetic expression Utf8 + Utf8 to valid types" + ); + + Ok(()) +} + #[test] fn test_date_timestamp_arithmetic_error() -> Result<()> { let (lhs, rhs) = BinaryTypeCoercer::new( diff --git a/datafusion/sqllogictest/test_files/string_numeric_coercion.slt b/datafusion/sqllogictest/test_files/string_numeric_coercion.slt index 1567a149bcdf4..8b9ad1371e769 100644 --- a/datafusion/sqllogictest/test_files/string_numeric_coercion.slt +++ b/datafusion/sqllogictest/test_files/string_numeric_coercion.slt @@ -553,6 +553,109 @@ SELECT arrow_cast('hello', 'RunEndEncoded("run_ends": non-null Int32, "values": ---- true +# ------------------------------------------------- +# Arithmetic operators use string-to-numeric coercion. +# A string operand is coerced to the numeric type of the other operand. +# See: https://github.com/apache/datafusion/issues/23041 +# ------------------------------------------------- + +# Literal numeric + string literal +query I +SELECT 1 + '1'; +---- +2 + +query T +SELECT arrow_typeof(1 + '1'); +---- +Int64 + +# String literal on the left-hand side +query I +SELECT '1' + 1; +---- +2 + +# All arithmetic operators against an integer column +query I rowsort +SELECT column1 + '10' FROM t_int; +---- +1010 +11 +15 +335 +509 + +query I rowsort +SELECT column1 - '1' FROM t_int; +---- +0 +324 +4 +498 +999 + +query I rowsort +SELECT column1 * '2' FROM t_int; +---- +10 +2 +2000 +650 +998 + +query I rowsort +SELECT column1 / '5' FROM t_int; +---- +0 +1 +200 +65 +99 + +query I rowsort +SELECT column1 % '2' FROM t_int; +---- +0 +1 +1 +1 +1 + +# String operand against a float column coerces to Float64 +query R rowsort +SELECT column1 + '0.5' FROM t_float; +---- +1000.6 +2 +326.2 +5.5 +500.4 + +query T +SELECT arrow_typeof(column1 + '0.5') FROM t_float LIMIT 1; +---- +Float64 + +# Plan shows the string literal is cast to the numeric type, not the reverse +query TT +EXPLAIN SELECT column1 + '10' FROM t_int; +---- +logical_plan +01)Projection: t_int.column1 + Int64(10) AS t_int.column1 + Utf8("10") +02)--TableScan: t_int projection=[column1] +physical_plan +01)ProjectionExec: expr=[column1@0 + 10 as t_int.column1 + Utf8("10")] +02)--DataSourceExec: partitions=1, partition_sizes=[1] + +# Non-numeric string operand errors at runtime when cast to the numeric type +statement error Arrow error: Cast error: Cannot cast string 'hello' to value of Int64 type +SELECT column1 + 'hello' FROM t_int; + +# Two string operands remain ambiguous and are not coerced +query error Cannot coerce arithmetic expression Utf8 \+ Utf8 to valid types +SELECT '1' + '2'; + # ------------------------------------------------- # Cleanup # -------------------------------------------------