diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index 4bea2e3641a..8b939deb911 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -22,6 +22,7 @@ use crate::array::child_to_validity; use crate::array::validity_to_child; use crate::arrays::ChunkedArray; use crate::arrays::Struct; +use crate::builtins::ArrayBuiltins; use crate::dtype::DType; use crate::dtype::FieldName; use crate::dtype::FieldNames; @@ -525,4 +526,38 @@ impl Array { // the correct length and dtype harmony. Ok(unsafe { Array::::new_unchecked(field_arrays, struct_fields, len, validity) }) } + + /// Push the struct's top-level validity into each field, so a row null at the struct level + /// becomes null in every field. + /// + /// If `remove_struct_validity` is set the result is non-nullable; otherwise it keeps its + /// top-level validity. + pub fn push_validity_into_children(&self, remove_struct_validity: bool) -> VortexResult { + let struct_validity = self.struct_validity(); + + let new_validity = if remove_struct_validity { + Validity::NonNullable + } else { + struct_validity.clone() + }; + + // Nothing to push down. + if struct_validity.definitely_no_nulls() { + return Self::try_new( + self.names().clone(), + self.unmasked_fields(), + self.len(), + new_validity, + ); + } + + // Null each field where the struct row is null. + let mask = struct_validity.to_array(self.len()); + let fields = self + .iter_unmasked_fields() + .map(|field| field.clone().mask(mask.clone())) + .collect::>>()?; + + Self::try_new(self.names().clone(), fields, self.len(), new_validity) + } } diff --git a/vortex-array/src/arrays/struct_/tests.rs b/vortex-array/src/arrays/struct_/tests.rs index 3eb65e0c018..efa52a53a9f 100644 --- a/vortex-array/src/arrays/struct_/tests.rs +++ b/vortex-array/src/arrays/struct_/tests.rs @@ -172,3 +172,178 @@ fn test_uncompressed_size_in_bytes() -> VortexResult<()> { assert_eq!(uncompressed_size, Some(4000)); Ok(()) } + +#[test] +fn test_push_validity_into_children_drops_struct_validity() -> VortexResult<()> { + let mut ctx = array_session().create_execution_ctx(); + let struct_array = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![ + buffer![1i32, 2, 3].into_array(), + buffer![10i32, 20, 30].into_array(), + ], + 3, + Validity::from_iter([true, false, true]), + )?; + + let pushed = struct_array.push_validity_into_children(true)?; + + // The struct is now non-nullable; the row-1 null lives in every field instead. + let expected = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![ + PrimitiveArray::new( + buffer![1i32, 2, 3], + Validity::from_iter([true, false, true]), + ) + .into_array(), + PrimitiveArray::new( + buffer![10i32, 20, 30], + Validity::from_iter([true, false, true]), + ) + .into_array(), + ], + 3, + Validity::NonNullable, + )?; + + assert!(!pushed.dtype().is_nullable()); + assert_arrays_eq!(pushed, expected, &mut ctx); + Ok(()) +} + +#[test] +fn test_push_validity_into_children_preserves_struct_validity() -> VortexResult<()> { + let mut ctx = array_session().create_execution_ctx(); + let struct_array = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![ + buffer![1i32, 2, 3].into_array(), + buffer![10i32, 20, 30].into_array(), + ], + 3, + Validity::from_iter([true, false, true]), + )?; + + let pushed = struct_array.push_validity_into_children(false)?; + + // The null now exists both at the struct level and in every field. + let expected = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![ + PrimitiveArray::new( + buffer![1i32, 2, 3], + Validity::from_iter([true, false, true]), + ) + .into_array(), + PrimitiveArray::new( + buffer![10i32, 20, 30], + Validity::from_iter([true, false, true]), + ) + .into_array(), + ], + 3, + Validity::from_iter([true, false, true]), + )?; + + assert!(pushed.dtype().is_nullable()); + assert_arrays_eq!(pushed, expected, &mut ctx); + Ok(()) +} + +#[test] +fn test_push_validity_into_children_intersects_field_validity() -> VortexResult<()> { + let mut ctx = array_session().create_execution_ctx(); + + // Fields carry their own nulls (a at row 1, b at row 2) and the struct is null at row 1, + // so pushing intersects both levels rather than overwriting the fields. + let struct_array = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![ + PrimitiveArray::from_option_iter([Some(1i32), None, Some(3)]).into_array(), + PrimitiveArray::from_option_iter([Some(10i64), Some(20), None]).into_array(), + ], + 3, + Validity::from_iter([true, false, true]), + )?; + + let pushed = struct_array.push_validity_into_children(true)?; + + // a: null at row 1; b: null at rows 1 and 2. + let expected = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![ + PrimitiveArray::from_option_iter([Some(1i32), None, Some(3)]).into_array(), + PrimitiveArray::from_option_iter([Some(10i64), None, None]).into_array(), + ], + 3, + Validity::NonNullable, + )?; + + assert_arrays_eq!(pushed, expected, &mut ctx); + Ok(()) +} + +#[test] +fn test_push_validity_into_children_all_invalid() -> VortexResult<()> { + let mut ctx = array_session().create_execution_ctx(); + let struct_array = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![ + buffer![1i32, 2, 3].into_array(), + buffer![10i32, 20, 30].into_array(), + ], + 3, + Validity::AllInvalid, + )?; + + let pushed = struct_array.push_validity_into_children(true)?; + + // Every row is null at the struct level, so every field becomes all-null. + let expected = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![ + PrimitiveArray::new(buffer![1i32, 2, 3], Validity::AllInvalid).into_array(), + PrimitiveArray::new(buffer![10i32, 20, 30], Validity::AllInvalid).into_array(), + ], + 3, + Validity::NonNullable, + )?; + + assert_arrays_eq!(pushed, expected, &mut ctx); + Ok(()) +} + +#[test] +fn test_push_validity_into_children_no_nulls() -> VortexResult<()> { + let mut ctx = array_session().create_execution_ctx(); + + // No nulls: the fields are untouched, only the top-level nullability changes. + let struct_array = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![ + buffer![1i32, 2, 3].into_array(), + buffer![10i32, 20, 30].into_array(), + ], + 3, + Validity::AllValid, + )?; + + let dropped = struct_array.push_validity_into_children(true)?; + let expected = StructArray::try_new( + FieldNames::from(["a", "b"]), + vec![ + buffer![1i32, 2, 3].into_array(), + buffer![10i32, 20, 30].into_array(), + ], + 3, + Validity::NonNullable, + )?; + assert!(!dropped.dtype().is_nullable()); + assert_arrays_eq!(dropped, expected, &mut ctx); + + let preserved = struct_array.push_validity_into_children(false)?; + assert!(preserved.dtype().is_nullable()); + assert_arrays_eq!(preserved, struct_array, &mut ctx); + Ok(()) +}