Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions vortex-array/src/arrays/struct_/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use crate::array::child_to_validity;
use crate::array::validity_to_child;
use crate::arrays::ChunkedArray;
use crate::arrays::Struct;
use crate::builtins::ArrayBuiltins;
use crate::dtype::DType;
use crate::dtype::FieldName;
use crate::dtype::FieldNames;
Expand Down Expand Up @@ -525,4 +526,38 @@ impl Array<Struct> {
// the correct length and dtype harmony.
Ok(unsafe { Array::<Struct>::new_unchecked(field_arrays, struct_fields, len, validity) })
}

/// Push the struct's top-level validity into each field, so a row null at the struct level
/// becomes null in every field.
///
/// If `remove_struct_validity` is set the result is non-nullable; otherwise it keeps its
/// top-level validity.
pub fn push_validity_into_children(&self, remove_struct_validity: bool) -> VortexResult<Self> {
let struct_validity = self.struct_validity();

let new_validity = if remove_struct_validity {
Validity::NonNullable
} else {
struct_validity.clone()
};

// Nothing to push down.
if struct_validity.definitely_no_nulls() {
return Self::try_new(
self.names().clone(),
self.unmasked_fields(),
self.len(),
new_validity,
);
}

// Null each field where the struct row is null.
let mask = struct_validity.to_array(self.len());
let fields = self
.iter_unmasked_fields()
.map(|field| field.clone().mask(mask.clone()))
.collect::<VortexResult<Vec<_>>>()?;

Self::try_new(self.names().clone(), fields, self.len(), new_validity)
}
}
175 changes: 175 additions & 0 deletions vortex-array/src/arrays/struct_/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,178 @@ fn test_uncompressed_size_in_bytes() -> VortexResult<()> {
assert_eq!(uncompressed_size, Some(4000));
Ok(())
}

#[test]
fn test_push_validity_into_children_drops_struct_validity() -> VortexResult<()> {
let mut ctx = array_session().create_execution_ctx();
let struct_array = StructArray::try_new(
FieldNames::from(["a", "b"]),
vec![
buffer![1i32, 2, 3].into_array(),
buffer![10i32, 20, 30].into_array(),
],
3,
Validity::from_iter([true, false, true]),
)?;

let pushed = struct_array.push_validity_into_children(true)?;

// The struct is now non-nullable; the row-1 null lives in every field instead.
let expected = StructArray::try_new(
FieldNames::from(["a", "b"]),
vec![
PrimitiveArray::new(
buffer![1i32, 2, 3],
Validity::from_iter([true, false, true]),
)
.into_array(),
PrimitiveArray::new(
buffer![10i32, 20, 30],
Validity::from_iter([true, false, true]),
)
.into_array(),
],
3,
Validity::NonNullable,
)?;

assert!(!pushed.dtype().is_nullable());
assert_arrays_eq!(pushed, expected, &mut ctx);
Ok(())
}

#[test]
fn test_push_validity_into_children_preserves_struct_validity() -> VortexResult<()> {
let mut ctx = array_session().create_execution_ctx();
let struct_array = StructArray::try_new(
FieldNames::from(["a", "b"]),
vec![
buffer![1i32, 2, 3].into_array(),
buffer![10i32, 20, 30].into_array(),
],
3,
Validity::from_iter([true, false, true]),
)?;

let pushed = struct_array.push_validity_into_children(false)?;

// The null now exists both at the struct level and in every field.
let expected = StructArray::try_new(
FieldNames::from(["a", "b"]),
vec![
PrimitiveArray::new(
buffer![1i32, 2, 3],
Validity::from_iter([true, false, true]),
)
.into_array(),
PrimitiveArray::new(
buffer![10i32, 20, 30],
Validity::from_iter([true, false, true]),
)
.into_array(),
],
3,
Validity::from_iter([true, false, true]),
)?;

assert!(pushed.dtype().is_nullable());
assert_arrays_eq!(pushed, expected, &mut ctx);
Ok(())
}

#[test]
fn test_push_validity_into_children_intersects_field_validity() -> VortexResult<()> {
let mut ctx = array_session().create_execution_ctx();

// Fields carry their own nulls (a at row 1, b at row 2) and the struct is null at row 1,
// so pushing intersects both levels rather than overwriting the fields.
let struct_array = StructArray::try_new(
FieldNames::from(["a", "b"]),
vec![
PrimitiveArray::from_option_iter([Some(1i32), None, Some(3)]).into_array(),
PrimitiveArray::from_option_iter([Some(10i64), Some(20), None]).into_array(),
],
3,
Validity::from_iter([true, false, true]),
)?;

let pushed = struct_array.push_validity_into_children(true)?;

// a: null at row 1; b: null at rows 1 and 2.
let expected = StructArray::try_new(
FieldNames::from(["a", "b"]),
vec![
PrimitiveArray::from_option_iter([Some(1i32), None, Some(3)]).into_array(),
PrimitiveArray::from_option_iter([Some(10i64), None, None]).into_array(),
],
3,
Validity::NonNullable,
)?;

assert_arrays_eq!(pushed, expected, &mut ctx);
Ok(())
}

#[test]
fn test_push_validity_into_children_all_invalid() -> VortexResult<()> {
let mut ctx = array_session().create_execution_ctx();
let struct_array = StructArray::try_new(
FieldNames::from(["a", "b"]),
vec![
buffer![1i32, 2, 3].into_array(),
buffer![10i32, 20, 30].into_array(),
],
3,
Validity::AllInvalid,
)?;

let pushed = struct_array.push_validity_into_children(true)?;

// Every row is null at the struct level, so every field becomes all-null.
let expected = StructArray::try_new(
FieldNames::from(["a", "b"]),
vec![
PrimitiveArray::new(buffer![1i32, 2, 3], Validity::AllInvalid).into_array(),
PrimitiveArray::new(buffer![10i32, 20, 30], Validity::AllInvalid).into_array(),
],
3,
Validity::NonNullable,
)?;

assert_arrays_eq!(pushed, expected, &mut ctx);
Ok(())
}

#[test]
fn test_push_validity_into_children_no_nulls() -> VortexResult<()> {
let mut ctx = array_session().create_execution_ctx();

// No nulls: the fields are untouched, only the top-level nullability changes.
let struct_array = StructArray::try_new(
FieldNames::from(["a", "b"]),
vec![
buffer![1i32, 2, 3].into_array(),
buffer![10i32, 20, 30].into_array(),
],
3,
Validity::AllValid,
)?;

let dropped = struct_array.push_validity_into_children(true)?;
let expected = StructArray::try_new(
FieldNames::from(["a", "b"]),
vec![
buffer![1i32, 2, 3].into_array(),
buffer![10i32, 20, 30].into_array(),
],
3,
Validity::NonNullable,
)?;
assert!(!dropped.dtype().is_nullable());
assert_arrays_eq!(dropped, expected, &mut ctx);

let preserved = struct_array.push_validity_into_children(false)?;
assert!(preserved.dtype().is_nullable());
assert_arrays_eq!(preserved, struct_array, &mut ctx);
Ok(())
}
Loading