From 3f1fb4709f4c5c5a7e1d29ae45124f0e5a33e303 Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Fri, 6 Mar 2026 19:53:25 +0100 Subject: [PATCH] Add support for ListView in unnest --- datafusion/common/src/scalar/mod.rs | 16 ++++ datafusion/expr/src/expr_schema.rs | 4 +- datafusion/expr/src/logical_plan/plan.rs | 14 ++- datafusion/physical-plan/src/unnest.rs | 33 ++++++- datafusion/sql/src/expr/function.rs | 2 + datafusion/sql/src/utils.rs | 4 +- datafusion/sqllogictest/test_files/unnest.slt | 96 +++++++++++++++++++ 7 files changed, 163 insertions(+), 6 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index c21d3e21f007e..4381476342ae3 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -3675,6 +3675,22 @@ impl ScalarValue { .with_field(field) .build_fixed_size_list_scalar(list_size) } + DataType::ListView(field) => { + let list_array = array.as_list_view::(); + let nested_array = list_array.value(index); + // Store as List scalar since ScalarValue has no ListView variant. + SingleRowListArrayBuilder::new(nested_array) + .with_field(field) + .build_list_scalar() + } + DataType::LargeListView(field) => { + let list_array = array.as_list_view::(); + let nested_array = list_array.value(index); + // Store as LargeList scalar since ScalarValue has no LargeListView variant. + SingleRowListArrayBuilder::new(nested_array) + .with_field(field) + .build_large_list_scalar() + } DataType::Date32 => typed_cast!(array, index, as_date32_array, Date32)?, DataType::Date64 => typed_cast!(array, index, as_date64_array, Date64)?, DataType::Time32(TimeUnit::Second) => { diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 4168310002c9e..92b78b157904f 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -141,7 +141,9 @@ impl ExprSchemable for Expr { match arg_data_type { DataType::List(field) | DataType::LargeList(field) - | DataType::FixedSizeList(field, _) => Ok(field.data_type().clone()), + | DataType::FixedSizeList(field, _) + | DataType::ListView(field) + | DataType::LargeListView(field) => Ok(field.data_type().clone()), DataType::Struct(_) => Ok(arg_data_type), DataType::Null => { not_impl_err!("unnest() does not support null yet") diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 1c901f6d4a0e6..ac271f46b0c90 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -4194,7 +4194,9 @@ impl Unnest { } DataType::List(_) | DataType::FixedSizeList(_, _) - | DataType::LargeList(_) => { + | DataType::LargeList(_) + | DataType::ListView(_) + | DataType::LargeListView(_) => { list_columns.push(( index, ColumnUnnestList { @@ -4269,7 +4271,11 @@ fn get_unnested_columns( let mut qualified_columns = Vec::with_capacity(1); match data_type { - DataType::List(_) | DataType::FixedSizeList(_, _) | DataType::LargeList(_) => { + DataType::List(_) + | DataType::FixedSizeList(_, _) + | DataType::LargeList(_) + | DataType::ListView(_) + | DataType::LargeListView(_) => { let data_type = get_unnested_list_datatype_recursive(data_type, depth)?; let new_field = Arc::new(Field::new( col_name, data_type, @@ -4306,7 +4312,9 @@ fn get_unnested_list_datatype_recursive( match data_type { DataType::List(field) | DataType::FixedSizeList(field, _) - | DataType::LargeList(field) => { + | DataType::LargeList(field) + | DataType::ListView(field) + | DataType::LargeListView(field) => { if depth == 1 { return Ok(field.data_type().clone()); } diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs index 48de79b741e06..85799250181b6 100644 --- a/datafusion/physical-plan/src/unnest.rs +++ b/datafusion/physical-plan/src/unnest.rs @@ -33,7 +33,8 @@ use crate::{ use arrow::array::{ Array, ArrayRef, AsArray, BooleanBufferBuilder, FixedSizeListArray, Int64Array, - LargeListArray, ListArray, PrimitiveArray, Scalar, StructArray, new_null_array, + LargeListArray, LargeListViewArray, ListArray, ListViewArray, PrimitiveArray, Scalar, + StructArray, new_null_array, }; use arrow::compute::kernels::length::length; use arrow::compute::kernels::zip::zip; @@ -845,6 +846,30 @@ impl ListArrayType for FixedSizeListArray { } } +impl ListArrayType for ListViewArray { + fn values(&self) -> &ArrayRef { + self.values() + } + + fn value_offsets(&self, row: usize) -> (i64, i64) { + let offset = self.value_offsets()[row] as i64; + let size = self.value_sizes()[row] as i64; + (offset, offset + size) + } +} + +impl ListArrayType for LargeListViewArray { + fn values(&self) -> &ArrayRef { + self.values() + } + + fn value_offsets(&self, row: usize) -> (i64, i64) { + let offset = self.value_offsets()[row]; + let size = self.value_sizes()[row]; + (offset, offset + size) + } +} + /// Unnest multiple list arrays according to the length array. fn unnest_list_arrays( list_arrays: &[ArrayRef], @@ -861,6 +886,12 @@ fn unnest_list_arrays( DataType::FixedSizeList(_, _) => { Ok(list_array.as_fixed_size_list() as &dyn ListArrayType) } + DataType::ListView(_) => { + Ok(list_array.as_list_view::() as &dyn ListArrayType) + } + DataType::LargeListView(_) => { + Ok(list_array.as_list_view::() as &dyn ListArrayType) + } other => exec_err!("Invalid unnest datatype {other }"), }) .collect::>>()?; diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index c81575366fb33..3ec699ae57624 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -905,6 +905,8 @@ impl SqlToRel<'_, S> { DataType::List(_) | DataType::LargeList(_) | DataType::FixedSizeList(_, _) + | DataType::ListView(_) + | DataType::LargeListView(_) | DataType::Struct(_) => Ok(()), DataType::Null => { not_impl_err!("unnest() does not support null yet") diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index 16ac353d4ba9b..1a76dd69f46c5 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -466,7 +466,9 @@ impl RecursiveUnnestRewriter<'_> { } DataType::List(_) | DataType::FixedSizeList(_, _) - | DataType::LargeList(_) => { + | DataType::LargeList(_) + | DataType::ListView(_) + | DataType::LargeListView(_) => { push_projection_dedupl( self.inner_projection_exprs, expr_in_unnest.clone().alias(placeholder_name.clone()), diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index 73aeb6c99d0db..ba499679a9a80 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -1233,3 +1233,99 @@ physical_plan # cleanup statement ok drop table t; + +######################################## +# Unnest ListView / LargeListView Tests # +######################################## + +## Basic unnest ListView in select list +query I +select unnest(arrow_cast([1,2,3], 'ListView(Int64)')); +---- +1 +2 +3 + +## Basic unnest ListView in from clause +query I +select * from unnest(arrow_cast([1,2,3], 'ListView(Int64)')); +---- +1 +2 +3 + +## Basic unnest LargeListView in select list +query I +select unnest(arrow_cast([1,2,3], 'LargeListView(Int64)')); +---- +1 +2 +3 + +## Basic unnest LargeListView in from clause +query I +select * from unnest(arrow_cast([1,2,3], 'LargeListView(Int64)')); +---- +1 +2 +3 + +## Unnest ListView with range +query I +select unnest(arrow_cast(range(1, 3), 'ListView(Int64)')); +---- +1 +2 + +## Unnest LargeListView with range +query I +select * from unnest(arrow_cast(range(1, 3), 'LargeListView(Int64)')); +---- +1 +2 + +## Multiple unnest with ListView columns from a table +query III +select + unnest(column1), + unnest(arrow_cast(column2, 'ListView(Int64)')), + unnest(arrow_cast(column4, 'LargeListView(Int64)')) +from unnest_table where column4 is not null; +---- +1 7 13 +2 NULL 14 +3 NULL NULL +4 8 15 +5 9 16 +NULL 10 NULL +NULL NULL 17 +NULL NULL 18 + +## Unnest ListView with null elements +query I +select unnest(arrow_cast([1, null, 3], 'ListView(Int64)')); +---- +1 +NULL +3 + +## Unnest empty ListView +query I +select unnest(arrow_cast([], 'ListView(Int64)')); +---- + +## Unnest ListView of strings +query T +select unnest(arrow_cast(['a','b','c'], 'ListView(Utf8)')); +---- +a +b +c + +## Unnest LargeListView of strings +query T +select unnest(arrow_cast(['a','b','c'], 'LargeListView(Utf8)')); +---- +a +b +c