diff --git a/Cargo.lock b/Cargo.lock index f0d8568502e9f..bc97507d64820 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2750,7 +2750,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2894,7 +2894,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4155,7 +4155,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4773,7 +4773,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.13.0", + "itertools 0.14.0", "log", "multimap", "petgraph", @@ -4792,7 +4792,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.117", @@ -4894,7 +4894,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -5261,7 +5261,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5516,6 +5516,7 @@ version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ + "indexmap 2.13.0", "itoa", "memchr", "serde", @@ -5873,11 +5874,12 @@ dependencies = [ [[package]] name = "substrait" -version = "0.62.2" +version = "0.63.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62fc4b483a129b9772ccb9c3f7945a472112fdd9140da87f8a4e7f1d44e045d0" +checksum = "e620ff4d5c02fd6f7752931aa74b16a26af66a63022cc1ad412c77edbe0bab47" dependencies = [ "heck", + "indexmap 2.13.0", "pbjson 0.8.0", "pbjson-build 0.8.0", "pbjson-types", @@ -5969,7 +5971,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6927,7 +6929,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 1c901f6d4a0e6..dc4daf1ab7532 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -4456,49 +4456,49 @@ mod tests { [ { "Plan": { + "Node Type": "Projection", "Expressions": [ "employee_csv.id" ], - "Node Type": "Projection", - "Output": [ - "id" - ], "Plans": [ { - "Condition": "employee_csv.state IN ()", "Node Type": "Filter", - "Output": [ - "id", - "state" - ], + "Condition": "employee_csv.state IN ()", "Plans": [ { "Node Type": "Subquery", - "Output": [ - "state" - ], "Plans": [ { "Node Type": "TableScan", + "Relation Name": "employee_csv", + "Plans": [], "Output": [ "state" - ], - "Plans": [], - "Relation Name": "employee_csv" + ] } + ], + "Output": [ + "state" ] }, { "Node Type": "TableScan", + "Relation Name": "employee_csv", + "Plans": [], "Output": [ "id", "state" - ], - "Plans": [], - "Relation Name": "employee_csv" + ] } + ], + "Output": [ + "id", + "state" ] } + ], + "Output": [ + "id" ] } } diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt index 717b455193354..9916892058569 100644 --- a/datafusion/sqllogictest/test_files/explain.slt +++ b/datafusion/sqllogictest/test_files/explain.slt @@ -646,11 +646,11 @@ logical_plan 02)--{ 03)----"Plan": { 04)------"Node Type": "Values", -05)------"Output": [ -06)--------"column1" -07)------], -08)------"Plans": [], -09)------"Values": "(Int64(1))" +05)------"Values": "(Int64(1))", +06)------"Plans": [], +07)------"Output": [ +08)--------"column1" +09)------] 10)----} 11)--} 12)] diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index 927c326b88fbc..a0f203cec8db6 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -44,7 +44,7 @@ object_store = { workspace = true } # We need to match the version in substrait, so we don't use the workspace version here pbjson-types = { version = "0.8.0" } prost = { workspace = true } -substrait = { version = "0.62", features = ["serde"] } +substrait = { version = "0.63.0", features = ["serde"] } url = { workspace = true } tokio = { workspace = true, features = ["fs"] } diff --git a/datafusion/substrait/src/extensions.rs b/datafusion/substrait/src/extensions.rs index 0f848270babb9..78c357f3b8886 100644 --- a/datafusion/substrait/src/extensions.rs +++ b/datafusion/substrait/src/extensions.rs @@ -113,14 +113,10 @@ impl TryFrom<&Vec> for Extensions { } impl From for Vec { - // Silence deprecation warnings for `extension_uri_reference` during the uri -> urn migration - // See: https://github.com/substrait-io/substrait/issues/856 - #[expect(deprecated)] fn from(val: Extensions) -> Vec { let mut extensions = vec![]; for (f_anchor, f_name) in val.functions { let function_extension = ExtensionFunction { - extension_uri_reference: u32::MAX, extension_urn_reference: u32::MAX, function_anchor: f_anchor, name: f_name, @@ -133,7 +129,6 @@ impl From for Vec { for (t_anchor, t_name) in val.types { let type_extension = ExtensionType { - extension_uri_reference: u32::MAX, // https://github.com/apache/datafusion/issues/11545 extension_urn_reference: u32::MAX, // https://github.com/apache/datafusion/issues/11545 type_anchor: t_anchor, name: t_name, @@ -146,8 +141,7 @@ impl From for Vec { for (tv_anchor, tv_name) in val.type_variations { let type_variation_extension = ExtensionTypeVariation { - extension_uri_reference: u32::MAX, // We don't register proper extension URIs yet - extension_urn_reference: u32::MAX, // We don't register proper extension URIs yet + extension_urn_reference: u32::MAX, // We don't register proper extension URNs yet type_variation_anchor: tv_anchor, name: tv_name, }; diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/field_reference.rs b/datafusion/substrait/src/logical_plan/consumer/expr/field_reference.rs index 50d93a4600a00..dae6c625ef55b 100644 --- a/datafusion/substrait/src/logical_plan/consumer/expr/field_reference.rs +++ b/datafusion/substrait/src/logical_plan/consumer/expr/field_reference.rs @@ -56,6 +56,9 @@ pub(crate) fn from_substrait_field_reference( Some(RootType::Expression(_)) => not_impl_err!( "Expression root type in field reference is not supported" ), + Some(RootType::LambdaParameterReference(_)) => not_impl_err!( + "Lambda parameter reference in field reference is not yet supported" + ), } } _ => not_impl_err!( diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs b/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs index ad38b6addee0b..d7d7a69581f05 100644 --- a/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs +++ b/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs @@ -43,7 +43,7 @@ use prost::Message; use std::sync::Arc; use substrait::proto; use substrait::proto::expression::Literal; -use substrait::proto::expression::literal::user_defined::Val; +use substrait::proto::expression::literal::user_defined::{TypeAnchorType, Val}; use substrait::proto::expression::literal::{ IntervalCompound, IntervalDayToSecond, IntervalYearToMonth, LiteralType, interval_day_to_second, @@ -474,11 +474,17 @@ pub(crate) fn from_substrait_literal( ))) }; - if let Some(name) = consumer - .get_extensions() - .types - .get(&user_defined.type_reference) - { + let type_ref = match user_defined.type_anchor_type { + Some(TypeAnchorType::TypeReference(ref_val)) => ref_val, + Some(TypeAnchorType::TypeAliasReference(_)) => { + return not_impl_err!( + "Type alias references in user-defined literals are not yet supported" + ); + } + None => 0, + }; + + if let Some(name) = consumer.get_extensions().types.get(&type_ref) { match name.as_ref() { FLOAT_16_TYPE_NAME => { // Rules for encoding fp16 Substrait literals are defined as part of Arrow here: @@ -518,14 +524,14 @@ pub(crate) fn from_substrait_literal( _ => { return not_impl_err!( "Unsupported Substrait user defined type with ref {} and name {}", - user_defined.type_reference, + type_ref, name ); } } } else { #[expect(deprecated)] - match user_defined.type_reference { + match type_ref { // Kept for backwards compatibility, producers should useIntervalYearToMonth instead INTERVAL_YEAR_MONTH_TYPE_REF => { let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else { @@ -568,7 +574,7 @@ pub(crate) fn from_substrait_literal( _ => { return not_impl_err!( "Unsupported Substrait user defined type literal with ref {}", - user_defined.type_reference + type_ref ); } } diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs b/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs index 5d98850c72cca..a0468dbd451b9 100644 --- a/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs +++ b/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs @@ -93,6 +93,9 @@ pub async fn from_substrait_rex( RexType::DynamicParameter(expr) => { consumer.consume_dynamic_parameter(expr, input_schema).await } + RexType::Lambda(_) | RexType::LambdaInvocation(_) => { + not_impl_err!("Lambda expressions are not yet supported") + } }, None => substrait_err!("Expression must set rex_type: {expression:?}"), } diff --git a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs index a23f1faed1eb0..730ceab8ccef3 100644 --- a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs @@ -141,7 +141,15 @@ use substrait::proto::{ /// /// // and user-defined literals /// fn consume_user_defined_literal(&self, literal: &proto::expression::literal::UserDefined) -> Result { -/// let type_string = self.extensions.types.get(&literal.type_reference).unwrap(); +/// // extract type_reference from the new TypeAnchorType oneof +/// let type_ref = match literal.type_anchor_type { +/// Some(proto::expression::literal::user_defined::TypeAnchorType::TypeReference(r)) => r, +/// Some(proto::expression::literal::user_defined::TypeAnchorType::TypeAliasReference(_)) => { +/// return not_impl_err!("Type alias references are not yet supported") +/// } +/// None => 0, +/// }; +/// let type_string = self.extensions.types.get(&type_ref).unwrap(); /// match type_string.as_str() { /// "u!foo" => not_impl_err!("handle foo conversion"), /// "u!bar" => not_impl_err!("handle bar conversion"), @@ -444,10 +452,22 @@ pub trait SubstraitConsumer: Send + Sync + Sized { &self, user_defined_literal: &proto::expression::literal::UserDefined, ) -> datafusion::common::Result { - substrait_err!( - "Missing handler for user-defined literals {}", - user_defined_literal.type_reference - ) + let type_ref = match user_defined_literal.type_anchor_type { + Some( + proto::expression::literal::user_defined::TypeAnchorType::TypeReference( + ref_val, + ), + ) => ref_val, + Some( + proto::expression::literal::user_defined::TypeAnchorType::TypeAliasReference(_), + ) => { + return not_impl_err!( + "Type alias references in user-defined literals are not yet supported" + ) + } + None => 0, + }; + substrait_err!("Missing handler for user-defined literals {}", type_ref) } } diff --git a/datafusion/substrait/src/logical_plan/producer/expr/literal.rs b/datafusion/substrait/src/logical_plan/producer/expr/literal.rs index 8882c992dca1c..bbed7ee9be417 100644 --- a/datafusion/substrait/src/logical_plan/producer/expr/literal.rs +++ b/datafusion/substrait/src/logical_plan/producer/expr/literal.rs @@ -117,7 +117,7 @@ pub(crate) fn to_substrait_literal( ( LiteralType::UserDefined( substrait::proto::expression::literal::UserDefined { - type_reference: type_anchor, + type_anchor_type: Some(substrait::proto::expression::literal::user_defined::TypeAnchorType::TypeReference(type_anchor)), type_parameters: vec![], val: Some(substrait::proto::expression::literal::user_defined::Val::Value( pbjson_types::Any { diff --git a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs index 3aa8aa2b68bcf..d130961596dc9 100644 --- a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs +++ b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs @@ -60,9 +60,6 @@ use substrait::version; /// /// Substrait also requires the input schema of the expressions to be included in the /// message. The field names of the input schema will be serialized. -// Silence deprecation warnings for `extension_uris` during the uri -> urn migration -// See: https://github.com/substrait-io/substrait/issues/856 -#[expect(deprecated)] pub fn to_substrait_extended_expr( exprs: &[(&Expr, &Field)], schema: &DFSchemaRef, @@ -87,7 +84,6 @@ pub fn to_substrait_extended_expr( Ok(Box::new(ExtendedExpression { advanced_extensions: None, expected_type_urls: vec![], - extension_uris: vec![], extension_urns: vec![], extensions: extensions.into(), version: Some(version::version_with_producer("datafusion")), diff --git a/datafusion/substrait/src/logical_plan/producer/plan.rs b/datafusion/substrait/src/logical_plan/producer/plan.rs index 9396329f8d3e7..3b58720dba832 100644 --- a/datafusion/substrait/src/logical_plan/producer/plan.rs +++ b/datafusion/substrait/src/logical_plan/producer/plan.rs @@ -24,9 +24,6 @@ use substrait::proto::{Plan, PlanRel, Rel, RelRoot, plan_rel}; use substrait::version; /// Convert DataFusion LogicalPlan to Substrait Plan -// Silence deprecation warnings for `extension_uris` during the uri -> urn migration -// See: https://github.com/substrait-io/substrait/issues/856 -#[expect(deprecated)] pub fn to_substrait_plan( plan: &LogicalPlan, state: &SessionState, @@ -47,7 +44,6 @@ pub fn to_substrait_plan( let extensions = producer.get_extensions(); Ok(Box::new(Plan { version: Some(version::version_with_producer("datafusion")), - extension_uris: vec![], extension_urns: vec![], extensions: extensions.into(), relations: plan_rels, diff --git a/datafusion/substrait/tests/utils.rs b/datafusion/substrait/tests/utils.rs index 6a6824579b4e8..4d9b5ca83e5e0 100644 --- a/datafusion/substrait/tests/utils.rs +++ b/datafusion/substrait/tests/utils.rs @@ -486,6 +486,7 @@ pub mod test { // Enum is deprecated #[expect(deprecated)] RexType::Enum(_) => {} + RexType::Lambda(_) | RexType::LambdaInvocation(_) => {} } Ok(()) }