From 047ede05f24e7f68545a611afd7ef62cdfd21da8 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Tue, 17 Mar 2026 16:29:17 -0400 Subject: [PATCH 1/7] PatchedArray: basics and wiring Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/mod.rs | 4 + vortex-array/src/arrays/patched/array.rs | 256 ++++++++++++ .../src/arrays/patched/compute/compare.rs | 273 ++++++++++++ .../src/arrays/patched/compute/filter.rs | 145 +++++++ .../src/arrays/patched/compute/mod.rs | 6 + .../src/arrays/patched/compute/rules.rs | 12 + vortex-array/src/arrays/patched/mod.rs | 75 ++++ .../src/arrays/patched/vtable/kernels.rs | 9 + vortex-array/src/arrays/patched/vtable/mod.rs | 389 ++++++++++++++++++ .../src/arrays/patched/vtable/operations.rs | 39 ++ .../src/arrays/patched/vtable/slice.rs | 183 ++++++++ vortex-buffer/src/buffer.rs | 28 ++ 12 files changed, 1419 insertions(+) create mode 100644 vortex-array/src/arrays/patched/array.rs create mode 100644 vortex-array/src/arrays/patched/compute/compare.rs create mode 100644 vortex-array/src/arrays/patched/compute/filter.rs create mode 100644 vortex-array/src/arrays/patched/compute/mod.rs create mode 100644 vortex-array/src/arrays/patched/compute/rules.rs create mode 100644 vortex-array/src/arrays/patched/mod.rs create mode 100644 vortex-array/src/arrays/patched/vtable/kernels.rs create mode 100644 vortex-array/src/arrays/patched/vtable/mod.rs create mode 100644 vortex-array/src/arrays/patched/vtable/operations.rs create mode 100644 vortex-array/src/arrays/patched/vtable/slice.rs diff --git a/vortex-array/src/arrays/mod.rs b/vortex-array/src/arrays/mod.rs index 62325b69eb5..2fd8aeeb9b3 100644 --- a/vortex-array/src/arrays/mod.rs +++ b/vortex-array/src/arrays/mod.rs @@ -66,6 +66,10 @@ pub mod null; pub use null::Null; pub use null::NullArray; +pub mod patched; +pub use patched::Patched; +pub use patched::PatchedArray; + pub mod primitive; pub use primitive::Primitive; pub use primitive::PrimitiveArray; diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs new file mode 100644 index 00000000000..9f96f3c5c9f --- /dev/null +++ b/vortex-array/src/arrays/patched/array.rs @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::ops::Range; + +use vortex_buffer::Buffer; +use vortex_buffer::BufferMut; +use vortex_error::VortexResult; +use vortex_error::vortex_ensure; + +use crate::ArrayRef; +use crate::Canonical; +use crate::DynArray; +use crate::ExecutionCtx; +use crate::arrays::patched::PatchAccessor; +use crate::arrays::patched::TransposedPatches; +use crate::arrays::patched::patch_lanes; +use crate::buffer::BufferHandle; +use crate::dtype::IntegerPType; +use crate::dtype::NativePType; +use crate::dtype::PType; +use crate::match_each_native_ptype; +use crate::match_each_unsigned_integer_ptype; +use crate::patches::Patches; +use crate::stats::ArrayStats; + +/// An array that partially "patches" another array with new values. +/// +/// Patched arrays implement the set of nodes that do this instead here...I think? +#[derive(Debug, Clone)] +pub struct PatchedArray { + /// The inner array that is being patched. This is the zeroth child. + pub(super) inner: ArrayRef, + + /// Number of 1024-element chunks. Pre-computed for convenience. + pub(super) n_chunks: usize, + + /// Number of lanes the patch indices and values have been split into. Each of the `n_chunks` + /// of 1024 values is split into `n_lanes` lanes horizontally, each lane having 1024 / n_lanes + /// values that might be patched. + pub(super) n_lanes: usize, + + /// Offset into the first chunk + pub(super) offset: usize, + /// Total length. + pub(super) len: usize, + + /// lane offsets. The PType of these MUST be u32 + pub(super) lane_offsets: BufferHandle, + /// indices within a 1024-element chunk. The PType of these MUST be u16 + pub(super) indices: BufferHandle, + /// patch values corresponding to the indices. The ptype is specified by `values_ptype`. + pub(super) values: BufferHandle, + /// PType of the scalars in `values`. Can be any native type. + pub(super) values_ptype: PType, + + pub(super) stats_set: ArrayStats, +} + +impl PatchedArray { + pub fn from_array_and_patches( + inner: ArrayRef, + patches: &Patches, + ctx: &mut ExecutionCtx, + ) -> VortexResult { + vortex_ensure!( + inner.dtype().eq_with_nullability_superset(patches.dtype()), + "array DType must match patches DType" + ); + + let values_ptype = patches.dtype().as_ptype(); + + let TransposedPatches { + n_chunks, + n_lanes, + lane_offsets, + indices, + values, + } = transpose_patches(patches, ctx)?; + + let len = inner.len(); + + Ok(Self { + inner, + n_chunks, + n_lanes, + values_ptype, + offset: 0, + len, + lane_offsets: BufferHandle::new_host(lane_offsets), + indices: BufferHandle::new_host(indices), + values: BufferHandle::new_host(values), + stats_set: ArrayStats::default(), + }) + } + + /// Get an accessor, which allows ranged access to patches by chunk/lane. + pub fn accessor(&self) -> PatchAccessor<'_, V> { + PatchAccessor { + n_lanes: self.n_lanes, + lane_offsets: self.lane_offsets.as_host().reinterpret::(), + indices: self.indices.as_host().reinterpret::(), + values: self.values.as_host().reinterpret::(), + } + } + + /// Slice the array to just the patches and inner values that are within the chunk range. + pub(crate) fn slice_chunks(&self, chunks: Range) -> VortexResult { + let lane_offsets_start = chunks.start * self.n_lanes; + let lane_offsets_stop = chunks.end * self.n_lanes + 1; + + let sliced_lane_offsets = self + .lane_offsets + .slice_typed::(lane_offsets_start..lane_offsets_stop); + let indices = self.indices.clone(); + let values = self.values.clone(); + + let begin = (chunks.start * 1024).max(self.offset); + let end = (chunks.end * 1024).min(self.len); + + let offset = begin % 1024; + + let inner = self.inner.slice(begin..end)?; + + let len = end - begin; + let n_chunks = (end - begin).div_ceil(1024); + + Ok(PatchedArray { + inner, + n_chunks, + n_lanes: self.n_lanes, + offset, + len, + indices, + values, + values_ptype: self.values_ptype, + lane_offsets: sliced_lane_offsets, + stats_set: ArrayStats::default(), + }) + } +} + +/// Transpose a set of patches from the default sorted layout into the data parallel layout. +#[allow(clippy::cognitive_complexity)] +fn transpose_patches(patches: &Patches, ctx: &mut ExecutionCtx) -> VortexResult { + let array_len = patches.array_len(); + let offset = patches.offset(); + + let indices = patches + .indices() + .clone() + .execute::(ctx)? + .into_primitive(); + + let values = patches + .values() + .clone() + .execute::(ctx)? + .into_primitive(); + + let indices_ptype = indices.ptype(); + let values_ptype = values.ptype(); + + let indices = indices.buffer_handle().clone().unwrap_host(); + let values = values.buffer_handle().clone().unwrap_host(); + + match_each_unsigned_integer_ptype!(indices_ptype, |I| { + match_each_native_ptype!(values_ptype, |V| { + let indices: Buffer = Buffer::from_byte_buffer(indices); + let values: Buffer = Buffer::from_byte_buffer(values); + + Ok(transpose( + indices.as_slice(), + values.as_slice(), + offset, + array_len, + )) + }) + }) +} + +#[allow(clippy::cast_possible_truncation)] +fn transpose( + indices_in: &[I], + values_in: &[V], + offset: usize, + array_len: usize, +) -> TransposedPatches { + // Total number of slots is number of chunks times number of lanes. + let n_chunks = array_len.div_ceil(1024); + assert!( + n_chunks <= u32::MAX as usize, + "Cannot transpose patches for array with >= 4 trillion elements" + ); + + let n_lanes = patch_lanes::(); + + // We know upfront how many indices and values we'll have. + let mut indices_buffer = BufferMut::with_capacity(indices_in.len()); + let mut values_buffer = BufferMut::with_capacity(values_in.len()); + + // number of patches in each chunk. + let mut lane_offsets: BufferMut = BufferMut::zeroed(n_chunks * n_lanes + 1); + + // Scan the index/values once to get chunk/lane counts + for index in indices_in { + let index = index.as_() - offset; + let chunk = index / 1024; + let lane = index % n_lanes; + + lane_offsets[chunk * n_lanes + lane + 1] += 1; + } + + // Prefix-sum sizes -> offsets + for index in 1..lane_offsets.len() { + lane_offsets[index] += lane_offsets[index - 1]; + } + + // Loop over patches, writing thme to final positions + let indices_out = indices_buffer.spare_capacity_mut(); + let values_out = values_buffer.spare_capacity_mut(); + for (index, &value) in std::iter::zip(indices_in, values_in) { + let index = index.as_() - offset; + let chunk = index / 1024; + let lane = index % n_lanes; + + let position = &mut lane_offsets[chunk * n_lanes + lane]; + indices_out[*position as usize].write((index % 1024) as u16); + values_out[*position as usize].write(value); + *position += 1; + } + + // SAFETY: we know there are exactly indices_in.len() indices/values, and we just + // set them to the appropriate values in the loop above. + unsafe { + indices_buffer.set_len(indices_in.len()); + values_buffer.set_len(values_in.len()); + } + + // Now, pass over all the indices and values again and subtract out the position increments. + for index in indices_in { + let index = index.as_() - offset; + let chunk = index / 1024; + let lane = index % n_lanes; + + lane_offsets[chunk * n_lanes + lane] -= 1; + } + + TransposedPatches { + n_chunks, + n_lanes, + lane_offsets: lane_offsets.freeze().into_byte_buffer(), + indices: indices_buffer.freeze().into_byte_buffer(), + values: values_buffer.freeze().into_byte_buffer(), + } +} diff --git a/vortex-array/src/arrays/patched/compute/compare.rs b/vortex-array/src/arrays/patched/compute/compare.rs new file mode 100644 index 00000000000..d1932ed1e44 --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/compare.rs @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BitBufferMut; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::Canonical; +use crate::ExecutionCtx; +use crate::IntoArray; +use crate::arrays::BoolArray; +use crate::arrays::ConstantArray; +use crate::arrays::Patched; +use crate::arrays::bool::BoolArrayParts; +use crate::arrays::patched::patch_lanes; +use crate::arrays::primitive::NativeValue; +use crate::builtins::ArrayBuiltins; +use crate::dtype::NativePType; +use crate::match_each_native_ptype; +use crate::scalar_fn::fns::binary::CompareKernel; +use crate::scalar_fn::fns::operators::CompareOperator; + +impl CompareKernel for Patched { + fn compare( + lhs: &Self::Array, + rhs: &ArrayRef, + operator: CompareOperator, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + let Some(constant) = rhs.as_constant() else { + return Ok(None); + }; + + let result = lhs + .inner + .binary( + ConstantArray::new(constant.clone(), lhs.len()).into_array(), + operator.into(), + )? + .execute::(ctx)? + .into_bool(); + + let BoolArrayParts { + bits, + offset, + len, + validity, + } = result.into_parts(); + + let mut bits = BitBufferMut::from_buffer(bits.unwrap_host().into_mut(), offset, len); + + fn apply( + bits: &mut BitBufferMut, + lane_offsets: &[u32], + indices: &[u16], + values: &[V], + constant: V, + cmp: F, + ) -> VortexResult<()> + where + F: Fn(V, V) -> bool, + { + let n_lanes = patch_lanes::(); + + for index in 0..(lane_offsets.len() - 1) { + let chunk = index / n_lanes; + + let lane_start = lane_offsets[index] as usize; + let lane_end = lane_offsets[index + 1] as usize; + + for (&patch_index, &patch_value) in std::iter::zip( + &indices[lane_start..lane_end], + &values[lane_start..lane_end], + ) { + let bit_index = chunk * 1024 + patch_index as usize; + if cmp(patch_value, constant) { + bits.set(bit_index) + } else { + bits.unset(bit_index) + } + } + } + + Ok(()) + } + + let lane_offsets = lhs.lane_offsets.as_host().reinterpret::(); + let indices = lhs.indices.as_host().reinterpret::(); + + match_each_native_ptype!(lhs.values_ptype, |V| { + let values = lhs.values.as_host().reinterpret::(); + let constant = constant + .as_primitive() + .as_::() + .vortex_expect("compare constant not null"); + + match operator { + CompareOperator::Eq => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) == NativeValue(r), + )?; + } + CompareOperator::NotEq => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) != NativeValue(r), + )?; + } + CompareOperator::Gt => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) > NativeValue(r), + )?; + } + CompareOperator::Gte => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) >= NativeValue(r), + )?; + } + CompareOperator::Lt => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) < NativeValue(r), + )?; + } + CompareOperator::Lte => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) <= NativeValue(r), + )?; + } + } + }); + + // SAFETY: thing + let result = unsafe { BoolArray::new_unchecked(bits.freeze(), validity) }; + Ok(Some(result.into_array())) + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_error::VortexResult; + + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::arrays::BoolArray; + use crate::arrays::ConstantArray; + use crate::arrays::Patched; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::patches::Patches; + use crate::scalar_fn::fns::binary::CompareKernel; + use crate::scalar_fn::fns::operators::CompareOperator; + use crate::validity::Validity; + + #[test] + fn test_basic() { + let lhs = PrimitiveArray::from_iter(0u32..512).into_array(); + let patches = Patches::new( + 512, + 0, + buffer![509u16, 510, 511].into_array(), + buffer![u32::MAX; 3].into_array(), + None, + ) + .unwrap(); + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let lhs = PatchedArray::from_array_and_patches(lhs, &patches, &mut ctx).unwrap(); + + let rhs = ConstantArray::new(u32::MAX, 512).into_array(); + + let result = ::compare(&lhs, &rhs, CompareOperator::Eq, &mut ctx) + .unwrap() + .unwrap(); + + let expected = + BoolArray::from_indices(512, [509, 510, 511], Validity::NonNullable).into_array(); + + assert_arrays_eq!(expected, result); + } + + #[test] + fn test_subnormal_f32() -> VortexResult<()> { + // Subnormal f32 values are smaller than f32::MIN_POSITIVE but greater than 0 + let subnormal: f32 = f32::MIN_POSITIVE / 2.0; + assert!(subnormal > 0.0 && subnormal < f32::MIN_POSITIVE); + + let lhs = PrimitiveArray::from_iter((0..512).map(|i| i as f32)).into_array(); + + let patches = Patches::new( + 512, + 0, + buffer![509u16, 510, 511].into_array(), + buffer![f32::NAN, subnormal, f32::NEG_INFINITY].into_array(), + None, + )?; + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + let lhs = PatchedArray::from_array_and_patches(lhs, &patches, &mut ctx)?; + + let rhs = ConstantArray::new(subnormal, 512).into_array(); + + let result = + ::compare(&lhs, &rhs, CompareOperator::Eq, &mut ctx)? + .unwrap(); + + let expected = BoolArray::from_indices(512, [510], Validity::NonNullable).into_array(); + + assert_arrays_eq!(expected, result); + Ok(()) + } + + #[test] + fn test_pos_neg_zero() -> VortexResult<()> { + let lhs = PrimitiveArray::from_iter([-0.0f32; 10]).into_array(); + + let patches = Patches::new( + 10, + 0, + buffer![5u16, 6, 7, 8, 9].into_array(), + buffer![f32::NAN, f32::NEG_INFINITY, 0f32, -0.0f32, f32::INFINITY].into_array(), + None, + )?; + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + let lhs = PatchedArray::from_array_and_patches(lhs, &patches, &mut ctx)?; + + let rhs = ConstantArray::new(0.0f32, 10).into_array(); + + let result = + ::compare(&lhs, &rhs, CompareOperator::Eq, &mut ctx)? + .unwrap(); + + let expected = BoolArray::from_indices(10, [7], Validity::NonNullable).into_array(); + + assert_arrays_eq!(expected, result); + + Ok(()) + } +} diff --git a/vortex-array/src/arrays/patched/compute/filter.rs b/vortex-array/src/arrays/patched/compute/filter.rs new file mode 100644 index 00000000000..d045a79c38f --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/filter.rs @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; +use vortex_mask::AllOr; +use vortex_mask::Mask; + +use crate::ArrayRef; +use crate::IntoArray; +use crate::arrays::FilterArray; +use crate::arrays::Patched; +use crate::arrays::filter::FilterReduce; + +impl FilterReduce for Patched { + fn filter(array: &Self::Array, mask: &Mask) -> VortexResult> { + // Find the contiguous chunk range that the mask covers. We use this to slice the inner + // components, then wrap the rest up with another FilterArray. + // + // This is helpful when we have a very selective filter that is clustered to a small + // range. + let (chunk_start, chunk_stop) = match mask.indices() { + AllOr::All | AllOr::None => { + // This is handled as the precondition to this method, see the FilterReduce + // documentation. + unreachable!("mask must be a MaskValues here") + } + AllOr::Some(indices) => { + let first = indices[0]; + let last = indices[indices.len() - 1]; + + (first / 1024, last.div_ceil(1024)) + } + }; + + // If all chunks already covered, there is nothing to do. + if chunk_start == 0 && chunk_stop == array.n_chunks { + return Ok(None); + } + + let sliced = array.slice_chunks(chunk_start..chunk_stop)?; + + let slice_start = chunk_start * 1024; + let slice_end = (chunk_stop * 1024).min(array.len()); + let remainder = mask.slice(slice_start..slice_end); + + Ok(Some( + FilterArray::new(sliced.into_array(), remainder).into_array(), + )) + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_error::VortexResult; + use vortex_mask::Mask; + + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::arrays::FilterArray; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::patches::Patches; + + #[test] + fn test_filter_noop() -> VortexResult<()> { + let array = buffer![u16::MIN; 5].into_array(); + let patched_indices = buffer![3u8, 4].into_array(); + let patched_values = buffer![u16::MAX; 2].into_array(); + + let patches = Patches::new(5, 0, patched_indices, patched_values, None)?; + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let array = PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?.into_array(); + + let filtered = FilterArray::new( + array.clone(), + Mask::from_iter([true, false, false, false, true]), + ) + .into_array(); + + let reduced = array.vtable().reduce_parent(&array, &filtered, 0)?; + + // Filter does not get pushed through to child because it does not prune any chunks. + assert!(reduced.is_none()); + + Ok(()) + } + + #[test] + fn test_filter_basic() -> VortexResult<()> { + // Basic test: filter with mask that crosses boundaries. + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let array = buffer![u16::MIN; 4096].into_array(); + let patched_indices = buffer![1024u16, 1025].into_array(); + let patched_values = buffer![u16::MAX, u16::MAX].into_array(); + + let patches = Patches::new(4096, 0, patched_indices, patched_values, None)?; + + let array = PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?.into_array(); + + // Filter that only touches the middle 2 chunks + let mask = Mask::from_indices(4096, vec![1024, 1025, 3000]); + + let filtered = FilterArray::new(array.clone(), mask).into_array(); + let reduced = array.vtable().reduce_parent(&array, &filtered, 0)?; + + let expected = PrimitiveArray::from_iter([u16::MAX, u16::MAX, u16::MIN]).into_array(); + + assert_arrays_eq!(expected, reduced.unwrap()); + + Ok(()) + } + + #[test] + fn test_filter_complex() -> VortexResult<()> { + // Basic test: filter with mask that crosses boundaries. + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let array = buffer![u16::MIN; 4096].into_array(); + let patched_indices = buffer![1024u16, 1025].into_array(); + let patched_values = buffer![u16::MAX, u16::MAX].into_array(); + + let patches = Patches::new(4096, 1, patched_indices, patched_values, None)?; + + let array = PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?.into_array(); + + // Filter that only touches the middle 2 chunks + let mask = Mask::from_indices(4096, vec![1024, 1025, 3000]); + + let filtered = FilterArray::new(array.clone(), mask).into_array(); + let reduced = array.vtable().reduce_parent(&array, &filtered, 0)?; + + let expected = PrimitiveArray::from_iter([u16::MAX, u16::MIN, u16::MIN]).into_array(); + + assert_arrays_eq!(expected, reduced.unwrap()); + + Ok(()) + } +} diff --git a/vortex-array/src/arrays/patched/compute/mod.rs b/vortex-array/src/arrays/patched/compute/mod.rs new file mode 100644 index 00000000000..aa8b18199b2 --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/mod.rs @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod compare; +mod filter; +pub(crate) mod rules; diff --git a/vortex-array/src/arrays/patched/compute/rules.rs b/vortex-array/src/arrays/patched/compute/rules.rs new file mode 100644 index 00000000000..3ecb25c1efa --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/rules.rs @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::arrays::Patched; +use crate::arrays::filter::FilterReduceAdaptor; +use crate::arrays::slice::SliceReduceAdaptor; +use crate::optimizer::rules::ParentRuleSet; + +pub(crate) const PARENT_RULES: ParentRuleSet = ParentRuleSet::new(&[ + ParentRuleSet::lift(&FilterReduceAdaptor(Patched)), + ParentRuleSet::lift(&SliceReduceAdaptor(Patched)), +]); diff --git a/vortex-array/src/arrays/patched/mod.rs b/vortex-array/src/arrays/patched/mod.rs new file mode 100644 index 00000000000..f035204c188 --- /dev/null +++ b/vortex-array/src/arrays/patched/mod.rs @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod array; +mod compute; +mod vtable; + +pub use array::*; +use vortex_buffer::ByteBuffer; +pub use vtable::*; + +/// Patches that have been transposed into GPU format. +struct TransposedPatches { + n_chunks: usize, + n_lanes: usize, + lane_offsets: ByteBuffer, + indices: ByteBuffer, + values: ByteBuffer, +} + +/// Number of lanes used at patch time for a value of type `V`. +/// +/// This is *NOT* equal to the number of FastLanes lanes for the type `V`, rather this is going to +/// correspond to how many "lanes" we will end up copying data on. +/// +/// When applied on the CPU, this configuration doesn't really matter. On the GPU, it is based +/// on the number of patches involved here. +const fn patch_lanes() -> usize { + // For types 32-bits or smaller, we use a 32 lane configuration, and for 64-bit we use 16 lanes. + // This matches up with the number of lanes we use to execute copying results from bit-unpacking + // from shared to global memory. + if size_of::() < 8 { 32 } else { 16 } +} + +pub struct PatchAccessor<'a, V> { + n_lanes: usize, + lane_offsets: &'a [u32], + indices: &'a [u16], + values: &'a [V], +} + +impl<'a, V: Sized> PatchAccessor<'a, V> { + /// Access the patches for a particular lane + pub fn access(&'a self, chunk: usize, lane: usize) -> LanePatches<'a, V> { + let start = self.lane_offsets[chunk * self.n_lanes + lane] as usize; + let stop = self.lane_offsets[chunk * self.n_lanes + lane + 1] as usize; + + LanePatches { + indices: &self.indices[start..stop], + values: &self.values[start..stop], + } + } +} + +pub struct LanePatches<'a, V> { + pub indices: &'a [u16], + pub values: &'a [V], +} + +impl<'a, V: Copy> LanePatches<'a, V> { + pub fn len(&self) -> usize { + self.indices.len() + } + + pub fn is_empty(&self) -> bool { + self.indices.is_empty() + } + + pub fn iter(&self) -> impl Iterator { + self.indices + .iter() + .copied() + .zip(self.values.iter().copied()) + } +} diff --git a/vortex-array/src/arrays/patched/vtable/kernels.rs b/vortex-array/src/arrays/patched/vtable/kernels.rs new file mode 100644 index 00000000000..2c60ec4a20f --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/kernels.rs @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::arrays::Patched; +use crate::kernel::ParentKernelSet; +use crate::scalar_fn::fns::binary::CompareExecuteAdaptor; + +pub(super) const PARENT_KERNELS: ParentKernelSet = + ParentKernelSet::new(&[ParentKernelSet::lift(&CompareExecuteAdaptor(Patched))]); diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs new file mode 100644 index 00000000000..bcc87d869ce --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod kernels; +mod operations; +mod slice; + +use std::hash::Hash; +use std::hash::Hasher; + +use vortex_buffer::Buffer; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; +use vortex_error::vortex_panic; +use vortex_session::VortexSession; + +use crate::ArrayEq; +use crate::ArrayHash; +use crate::ArrayRef; +use crate::Canonical; +use crate::DeserializeMetadata; +use crate::DynArray; +use crate::ExecutionCtx; +use crate::ExecutionStep; +use crate::IntoArray; +use crate::Precision; +use crate::ProstMetadata; +use crate::arrays::PrimitiveArray; +use crate::arrays::patched::PatchedArray; +use crate::arrays::patched::compute::rules::PARENT_RULES; +use crate::arrays::patched::patch_lanes; +use crate::arrays::patched::vtable::kernels::PARENT_KERNELS; +use crate::arrays::primitive::PrimitiveArrayParts; +use crate::buffer::BufferHandle; +use crate::dtype::DType; +use crate::dtype::NativePType; +use crate::match_each_native_ptype; +use crate::serde::ArrayChildren; +use crate::stats::ArrayStats; +use crate::stats::StatsSetRef; +use crate::vtable; +use crate::vtable::ArrayId; +use crate::vtable::VTable; +use crate::vtable::ValidityChild; +use crate::vtable::ValidityVTableFromChild; + +vtable!(Patched); + +#[derive(Debug)] +pub struct Patched; + +impl ValidityChild for Patched { + fn validity_child(array: &PatchedArray) -> &ArrayRef { + &array.inner + } +} + +#[derive(Clone, prost::Message)] +pub struct PatchedMetadata { + #[prost(uint32, tag = "1")] + pub(crate) offset: u32, +} + +impl VTable for Patched { + type Array = PatchedArray; + type Metadata = ProstMetadata; + type OperationsVTable = Self; + type ValidityVTable = ValidityVTableFromChild; + + fn id(_array: &Self::Array) -> ArrayId { + ArrayId::new_ref("vortex.patched") + } + + fn len(array: &Self::Array) -> usize { + array.len + } + + fn dtype(array: &Self::Array) -> &DType { + array.inner.dtype() + } + + fn stats(array: &Self::Array) -> StatsSetRef<'_> { + array.stats_set.to_ref(array.as_ref()) + } + + fn array_hash(array: &Self::Array, state: &mut H, precision: Precision) { + array.inner.array_hash(state, precision); + array.values_ptype.hash(state); + array.n_chunks.hash(state); + array.n_lanes.hash(state); + array.lane_offsets.array_hash(state, precision); + array.indices.array_hash(state, precision); + array.values.array_hash(state, precision); + } + + fn array_eq(array: &Self::Array, other: &Self::Array, precision: Precision) -> bool { + array.n_chunks == other.n_chunks + && array.n_lanes == other.n_lanes + && array.values_ptype == other.values_ptype + && array.inner.array_eq(&other.inner, precision) + && array.lane_offsets.array_eq(&other.lane_offsets, precision) + && array.indices.array_eq(&other.indices, precision) + && array.values.array_eq(&other.values, precision) + } + + fn nbuffers(_array: &Self::Array) -> usize { + 3 + } + + fn buffer(array: &Self::Array, idx: usize) -> BufferHandle { + match idx { + 0 => array.lane_offsets.clone(), + 1 => array.indices.clone(), + 2 => array.values.clone(), + _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), + } + } + + fn buffer_name(_array: &Self::Array, idx: usize) -> Option { + match idx { + 0 => Some("lane_offsets".to_string()), + 1 => Some("patch_indices".to_string()), + 2 => Some("patch_values".to_string()), + _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), + } + } + + fn nchildren(_array: &Self::Array) -> usize { + 1 + } + + fn child(array: &Self::Array, idx: usize) -> ArrayRef { + if idx == 0 { + array.inner.clone() + } else { + vortex_panic!("invalid child index for PatchedArray: {idx}"); + } + } + + fn child_name(_array: &Self::Array, idx: usize) -> String { + if idx == 0 { + "inner".to_string() + } else { + vortex_panic!("invalid child index for PatchedArray: {idx}"); + } + } + + #[allow(clippy::cast_possible_truncation)] + fn metadata(array: &Self::Array) -> VortexResult { + Ok(ProstMetadata(PatchedMetadata { + offset: array.offset as u32, + })) + } + + fn serialize(_metadata: Self::Metadata) -> VortexResult>> { + Ok(Some(vec![])) + } + + fn deserialize( + bytes: &[u8], + _dtype: &DType, + _len: usize, + _buffers: &[BufferHandle], + _session: &VortexSession, + ) -> VortexResult { + let inner = as DeserializeMetadata>::deserialize(bytes)?; + Ok(ProstMetadata(inner)) + } + + fn build( + dtype: &DType, + len: usize, + metadata: &Self::Metadata, + buffers: &[BufferHandle], + children: &dyn ArrayChildren, + ) -> VortexResult { + let inner = children.get(0, dtype, len)?; + + let n_chunks = len.div_ceil(1024); + + let n_lanes = match_each_native_ptype!(dtype.as_ptype(), |P| { patch_lanes::

() }); + + let &[lane_offsets, indices, values] = &buffers else { + vortex_bail!("invalid buffer count for PatchedArray"); + }; + + Ok(PatchedArray { + inner, + n_chunks, + n_lanes, + offset: metadata.offset as usize, + len, + lane_offsets: lane_offsets.clone(), + indices: indices.clone(), + values: values.clone(), + values_ptype: dtype.as_ptype(), + stats_set: ArrayStats::default(), + }) + } + + fn with_children(array: &mut Self::Array, mut children: Vec) -> VortexResult<()> { + vortex_ensure!( + children.len() == 1, + "PatchedArray must have exactly 1 child" + ); + + array.inner = children.remove(0); + + Ok(()) + } + + fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult { + let inner = array + .inner + .clone() + .execute::(ctx)? + .into_primitive(); + + let PrimitiveArrayParts { + buffer, + ptype, + validity, + } = inner.into_parts(); + + let lane_offsets: Buffer = + Buffer::from_byte_buffer(array.lane_offsets.clone().unwrap_host()); + let indices: Buffer = Buffer::from_byte_buffer(array.indices.clone().unwrap_host()); + + let patched_values = match_each_native_ptype!(array.values_ptype, |V| { + let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); + let values: Buffer = Buffer::from_byte_buffer(array.values.clone().unwrap_host()); + + let offset = array.offset; + let len = array.len; + + apply::( + &mut output, + offset, + len, + array.n_chunks, + array.n_lanes, + &lane_offsets, + &indices, + &values, + ); + + // The output will always be aligned to a chunk boundary, we apply the offset/len + // at the end to slice to only the in-bounds values. + let _output = output.as_slice(); + let output = output.freeze().slice(offset..offset + len); + + PrimitiveArray::from_byte_buffer(output.into_byte_buffer(), ptype, validity) + }); + + Ok(ExecutionStep::done(patched_values.into_array())) + } + + fn execute_parent( + array: &Self::Array, + parent: &ArrayRef, + child_idx: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + PARENT_KERNELS.execute(array, parent, child_idx, ctx) + } + + fn reduce_parent( + array: &Self::Array, + parent: &ArrayRef, + child_idx: usize, + ) -> VortexResult> { + PARENT_RULES.evaluate(array, parent, child_idx) + } +} + +/// Apply patches on top of the existing value types. +#[allow(clippy::too_many_arguments)] +fn apply( + output: &mut [V], + offset: usize, + len: usize, + n_chunks: usize, + n_lanes: usize, + lane_offsets: &[u32], + indices: &[u16], + values: &[V], +) { + for chunk in 0..n_chunks { + let start = lane_offsets[chunk * n_lanes] as usize; + let stop = lane_offsets[chunk * n_lanes + n_lanes] as usize; + + for idx in start..stop { + // the indices slice is measured as an offset into the 1024-value chunk. + let index = chunk * 1024 + indices[idx] as usize; + if index < offset || index >= offset + len { + continue; + } + + let value = values[idx]; + output[index] = value; + } + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_buffer::buffer_mut; + use vortex_session::VortexSession; + + use crate::Canonical; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::arrays::PatchedArray; + use crate::dtype::Nullability; + use crate::patches::Patches; + use crate::scalar::Scalar; + + #[test] + fn test_execute() { + let values = buffer![0u16; 1024].into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + let executed = array + .execute::(&mut ctx) + .unwrap() + .into_primitive() + .into_buffer::(); + + let mut expected = buffer_mut![0u16; 1024]; + expected[1] = 1; + expected[2] = 1; + expected[3] = 1; + + assert_eq!(executed, expected.freeze()); + } + + #[test] + fn test_scalar_at() { + let values = buffer![0u16; 1024].into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + assert_eq!( + array.scalar_at(0).unwrap(), + Scalar::primitive(0u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(1).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(2).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(3).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + } +} diff --git a/vortex-array/src/arrays/patched/vtable/operations.rs b/vortex-array/src/arrays/patched/vtable/operations.rs new file mode 100644 index 00000000000..ddf5dcec590 --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/operations.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use crate::DynArray; +use crate::arrays::patched::Patched; +use crate::arrays::patched::PatchedArray; +use crate::arrays::patched::patch_lanes; +use crate::match_each_native_ptype; +use crate::scalar::Scalar; +use crate::vtable::OperationsVTable; + +impl OperationsVTable for Patched { + fn scalar_at(array: &PatchedArray, index: usize) -> VortexResult { + // First check the patches + let chunk = index / 1024; + #[allow(clippy::cast_possible_truncation)] + let chunk_index = (index % 1024) as u16; + match_each_native_ptype!(array.values_ptype, |V| { + let lane = index % patch_lanes::(); + let accessor = array.accessor::(); + let patches = accessor.access(chunk, lane); + // NOTE: we do linear scan as lane has <= 32 patches, binary search would likely + // be slower. + for (patch_index, patch_value) in patches.iter() { + if patch_index == chunk_index { + return Ok(Scalar::primitive( + patch_value, + array.inner.dtype().nullability(), + )); + } + } + }); + + // Otherwise, access the underlying value. + array.inner.scalar_at(index) + } +} diff --git a/vortex-array/src/arrays/patched/vtable/slice.rs b/vortex-array/src/arrays/patched/vtable/slice.rs new file mode 100644 index 00000000000..99d04666d5f --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/slice.rs @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::ops::Range; + +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::DynArray; +use crate::IntoArray; +use crate::arrays::Patched; +use crate::arrays::PatchedArray; +use crate::arrays::slice::SliceReduce; +use crate::stats::ArrayStats; + +/// Is this something that uses a SliceKernel or a SliceReduce +impl SliceReduce for Patched { + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + // We **always** slice at 1024-element chunk boundaries. We keep the offset + len + // around so that when we execute we know how much to chop off. + let new_offset = (range.start + array.offset) % 1024; + let new_len = range.end - range.start; + + let chunk_start = (range.start + array.offset) / 1024; + let chunk_stop = (range.end + array.offset).div_ceil(1024); + + // Slice the inner to chunk boundaries + let inner_start = chunk_start * 1024; + let inner_stop = (chunk_stop * 1024).min(array.inner.len()); + let inner = array.inner.slice(inner_start..inner_stop)?; + + // Slice to only maintain offsets to the sliced chunks + let sliced_lane_offsets = array + .lane_offsets + .slice_typed::((chunk_start * array.n_lanes)..(chunk_stop * array.n_lanes) + 1); + + Ok(Some( + PatchedArray { + inner, + n_chunks: chunk_stop - chunk_start, + n_lanes: array.n_lanes, + + offset: new_offset, + len: new_len, + lane_offsets: sliced_lane_offsets, + indices: array.indices.clone(), + values: array.values.clone(), + values_ptype: array.values_ptype, + stats_set: ArrayStats::default(), + } + .into_array(), + )) + } +} + +#[cfg(test)] +mod tests { + use std::ops::Range; + + use rstest::rstest; + use vortex_buffer::Buffer; + use vortex_buffer::BufferMut; + use vortex_buffer::buffer; + use vortex_error::VortexResult; + + use crate::Canonical; + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::dtype::NativePType; + use crate::patches::Patches; + + #[test] + fn test_reduce() -> VortexResult<()> { + let values = buffer![0u16; 512].into_array(); + let patch_indices = buffer![1u32, 8, 30].into_array(); + let patch_values = buffer![u16::MAX; 3].into_array(); + let patches = Patches::new(512, 0, patch_indices, patch_values, None).unwrap(); + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let patched_array = + PatchedArray::from_array_and_patches(values, &patches, &mut ctx).unwrap(); + + let sliced = patched_array.slice(1..10)?; + + insta::assert_snapshot!( + sliced.display_tree_encodings_only(), + @r#" + root: vortex.patched(u16, len=9) + inner: vortex.primitive(u16, len=512) + "#); + + let executed = sliced.execute::(&mut ctx)?.into_primitive(); + + assert_eq!( + &[u16::MAX, 0, 0, 0, 0, 0, 0, u16::MAX, 0], + executed.as_slice::() + ); + + Ok(()) + } + + #[rstest] + #[case::trivial(buffer![1u64; 2], buffer![1u32], buffer![u64::MAX], 1..2)] + #[case::one_chunk(buffer![0u64; 1024], buffer![1u32, 8, 30], buffer![u64::MAX; 3], 1..10)] + #[case::multichunk(buffer![1u64; 10_000], buffer![0u32, 1, 2, 3, 4, 16, 17, 18, 19, 1024, 2048, 2049], buffer![u64::MAX; 12], 1024..5000)] + fn test_cases( + #[case] inner: Buffer, + #[case] patch_indices: Buffer, + #[case] patch_values: Buffer, + #[case] range: Range, + ) { + // Create patched array. + let patches = Patches::new( + inner.len(), + 0, + patch_indices.into_array(), + patch_values.into_array(), + None, + ) + .unwrap(); + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let patched_array = + PatchedArray::from_array_and_patches(inner.into_array(), &patches, &mut ctx).unwrap(); + + // Verify that applying slice first yields same result as applying slice at end. + let slice_first = patched_array + .slice(range.clone()) + .unwrap() + .execute::(&mut ctx) + .unwrap() + .into_array(); + + let slice_last = patched_array + .into_array() + .execute::(&mut ctx) + .unwrap() + .into_primitive() + .slice(range) + .unwrap(); + + assert_arrays_eq!(slice_first, slice_last); + } + + #[test] + fn test_stacked_slices() { + let values = PrimitiveArray::from_iter(0u64..10_000).into_array(); + + let patched_indices = buffer![1u32, 2, 1024, 2048, 3072, 3088].into_array(); + let patched_values = buffer![0u64, 1, 2, 3, 4, 5].into_array(); + + let patches = Patches::new(10_000, 0, patched_indices, patched_values, None).unwrap(); + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let patched_array = + PatchedArray::from_array_and_patches(values, &patches, &mut ctx).unwrap(); + + let sliced = patched_array + .slice(1024..5000) + .unwrap() + .slice(1..2065) + .unwrap() + .execute::(&mut ctx) + .unwrap() + .into_array(); + + let mut expected = BufferMut::from_iter(1025u64..=3088); + expected[1023] = 3; + expected[2047] = 4; + expected[2063] = 5; + + let expected = expected.into_array(); + + assert_arrays_eq!(expected, sliced); + } +} diff --git a/vortex-buffer/src/buffer.rs b/vortex-buffer/src/buffer.rs index 5a7de66f4fd..49cb7312c2e 100644 --- a/vortex-buffer/src/buffer.rs +++ b/vortex-buffer/src/buffer.rs @@ -498,6 +498,34 @@ impl Buffer { } } +impl ByteBuffer { + /// Reinterpret the byte buffer as a slice of values of type `V`. + /// + /// # Panics + /// + /// This method will only work if the buffer has the proper size and alignment to be viewed + /// as a buffer of `V` values. + pub fn reinterpret(&self) -> &[V] { + assert!( + self.is_aligned(Alignment::of::()), + "ByteBuffer not properly aligned to {}", + type_name::() + ); + + assert_eq!( + self.length % size_of::(), + 0, + "ByteBuffer length not a multiple of the value length" + ); + + let v_len = self.length / size_of::(); + let v_ptr = self.bytes.as_ptr().cast::(); + + // SAFETY: we checked that alignment and length are suitable to treat this as a &[V]. + unsafe { std::slice::from_raw_parts(v_ptr, v_len) } + } +} + /// An iterator over Buffer elements. /// /// This is an analog to the `std::slice::Iter` type. From 33cdc464b94c226504327d33fcc696f5613b8ba6 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 18 Mar 2026 16:18:19 -0400 Subject: [PATCH 2/7] take Signed-off-by: Andrew Duffy --- .../src/arrays/patched/compute/mod.rs | 1 + .../src/arrays/patched/compute/take.rs | 110 ++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 vortex-array/src/arrays/patched/compute/take.rs diff --git a/vortex-array/src/arrays/patched/compute/mod.rs b/vortex-array/src/arrays/patched/compute/mod.rs index aa8b18199b2..8634a22f90b 100644 --- a/vortex-array/src/arrays/patched/compute/mod.rs +++ b/vortex-array/src/arrays/patched/compute/mod.rs @@ -4,3 +4,4 @@ mod compare; mod filter; pub(crate) mod rules; +mod take; diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs new file mode 100644 index 00000000000..59a483c83f0 --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use rustc_hash::FxHashMap; +use vortex_buffer::Buffer; +use vortex_error::VortexResult; + +use crate::arrays::dict::TakeExecute; +use crate::arrays::primitive::PrimitiveArrayParts; +use crate::arrays::{Patched, PrimitiveArray}; +use crate::dtype::{IntegerPType, NativePType}; +use crate::{ArrayRef, DynArray, IntoArray, match_each_native_ptype}; +use crate::{ExecutionCtx, match_each_unsigned_integer_ptype}; + +impl TakeExecute for Patched { + fn take( + array: &Self::Array, + indices: &ArrayRef, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + // Perform take on the inner array, including the placeholders. + let inner = array + .inner + .take(indices.clone())? + .execute::(ctx)?; + + let PrimitiveArrayParts { + buffer, + validity, + ptype, + } = inner.into_parts(); + + let indices_ptype = indices.dtype().as_ptype(); + + match_each_unsigned_integer_ptype!(indices_ptype, |I| { + match_each_native_ptype!(ptype, |V| { + let indices = indices.clone().execute::(ctx)?; + let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); + take_map( + output.as_mut(), + indices.as_slice::(), + array.offset, + array.len, + array.n_chunks, + array.n_lanes, + array.lane_offsets.as_host().reinterpret::(), + array.indices.as_host().reinterpret::(), + array.values.as_host().reinterpret::(), + ); + + // SAFETY: output and validity still have same length after take_map returns. + unsafe { + return Ok(Some( + PrimitiveArray::new_unchecked(output.freeze(), validity).into_array(), + )); + } + }) + }); + } +} + +/// Take patches for the given `indices` and apply them onto an `output` using a hash map. +/// +/// First, builds a hashmap from index to patch value, then uses the hashmap in a loop to collect +/// the values. +fn take_map( + output: &mut [V], + indices: &[I], + offset: usize, + len: usize, + n_chunks: usize, + n_lanes: usize, + lane_offsets: &[u32], + patch_index: &[u16], + patch_value: &[V], +) { + // Build a hashmap of patch_index -> values. + let mut index_map = FxHashMap::with_capacity(indices.len()); + for chunk in 0..n_chunks { + for lane in 0..n_lanes { + let [lane_start, lane_end] = lane_offsets[chunk * n_lanes + lane..][..2]; + for i in lane_start..lane_end { + let patch_idx = patch_index[i as usize]; + let patch_value = patch_value[i as usize]; + + let index = chunk * 1024 + patch_idx as usize; + if index >= offset && index < offset + len { + index_map.insert(index, patch_value); + } + } + } + } + + // Now, iterate the take indices using the prebuilt hashmap. + // Undefined/null indices will miss the hash map, which we can ignore. + for index in indices { + let index = index.as_(); + if let Some(&patch_value) = index_map.get(&index) { + output[index] = patch_value; + } + } +} + +#[cfg(test)] +mod tests { + #[test] + fn test_take() { + // Patch some values here instead. + } +} From c05e4d0c064a7989193b120db4e976bc2358abe7 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 18 Mar 2026 16:53:11 -0400 Subject: [PATCH 3/7] add unit tests Signed-off-by: Andrew Duffy --- .../src/arrays/patched/compute/take.rs | 142 ++++++++++++++++-- 1 file changed, 131 insertions(+), 11 deletions(-) diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs index 59a483c83f0..3db64313be4 100644 --- a/vortex-array/src/arrays/patched/compute/take.rs +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -5,12 +5,18 @@ use rustc_hash::FxHashMap; use vortex_buffer::Buffer; use vortex_error::VortexResult; +use crate::ArrayRef; +use crate::DynArray; +use crate::ExecutionCtx; +use crate::IntoArray; +use crate::arrays::Patched; +use crate::arrays::PrimitiveArray; use crate::arrays::dict::TakeExecute; use crate::arrays::primitive::PrimitiveArrayParts; -use crate::arrays::{Patched, PrimitiveArray}; -use crate::dtype::{IntegerPType, NativePType}; -use crate::{ArrayRef, DynArray, IntoArray, match_each_native_ptype}; -use crate::{ExecutionCtx, match_each_unsigned_integer_ptype}; +use crate::dtype::IntegerPType; +use crate::dtype::NativePType; +use crate::match_each_native_ptype; +use crate::match_each_unsigned_integer_ptype; impl TakeExecute for Patched { fn take( @@ -50,12 +56,12 @@ impl TakeExecute for Patched { // SAFETY: output and validity still have same length after take_map returns. unsafe { - return Ok(Some( + Ok(Some( PrimitiveArray::new_unchecked(output.freeze(), validity).into_array(), - )); + )) } }) - }); + }) } } @@ -63,6 +69,7 @@ impl TakeExecute for Patched { /// /// First, builds a hashmap from index to patch value, then uses the hashmap in a loop to collect /// the values. +#[allow(clippy::too_many_arguments)] fn take_map( output: &mut [V], indices: &[I], @@ -75,10 +82,11 @@ fn take_map( patch_value: &[V], ) { // Build a hashmap of patch_index -> values. - let mut index_map = FxHashMap::with_capacity(indices.len()); + let mut index_map = FxHashMap::with_capacity_and_hasher(indices.len(), Default::default()); for chunk in 0..n_chunks { for lane in 0..n_lanes { - let [lane_start, lane_end] = lane_offsets[chunk * n_lanes + lane..][..2]; + let lane_start = lane_offsets[chunk * n_lanes + lane]; + let lane_end = lane_offsets[chunk * n_lanes + lane + 1]; for i in lane_start..lane_end { let patch_idx = patch_index[i as usize]; let patch_value = patch_value[i as usize]; @@ -103,8 +111,120 @@ fn take_map( #[cfg(test)] mod tests { + use vortex_buffer::buffer; + use vortex_error::VortexResult; + use vortex_session::VortexSession; + + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::patches::Patches; + + fn make_patched_array( + base: &[u16], + patch_indices: &[u32], + patch_values: &[u16], + ) -> VortexResult { + let values = PrimitiveArray::from_iter(base.iter().copied()).into_array(); + let patches = Patches::new( + base.len(), + 0, + PrimitiveArray::from_iter(patch_indices.iter().copied()).into_array(), + PrimitiveArray::from_iter(patch_values.iter().copied()).into_array(), + None, + )?; + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + } + + #[test] + fn test_take_basic() -> VortexResult<()> { + // Array with base values [0, 0, 0, 0, 0] patched at indices [1, 3] with values [10, 30] + let array = make_patched_array(&[0; 5], &[1, 3], &[10, 30])?.into_array(); + + // Take indices [0, 1, 2, 3, 4] - should get [0, 10, 0, 30, 0] + let indices = buffer![0u32, 1, 2, 3, 4].into_array(); + let result = array.take(indices)?; + + let expected = PrimitiveArray::from_iter([0u16, 10, 0, 30, 0]).into_array(); + assert_arrays_eq!(expected, result); + + Ok(()) + } + + #[test] + fn test_take_out_of_order() -> VortexResult<()> { + // Array with base values [0, 0, 0, 0, 0] patched at indices [1, 3] with values [10, 30] + let array = make_patched_array(&[0; 5], &[1, 3], &[10, 30])?.into_array(); + + // Take indices in reverse order + let indices = buffer![4u32, 3, 2, 1, 0].into_array(); + let result = array.take(indices)?; + + let expected = PrimitiveArray::from_iter([0u16, 30, 0, 10, 0]).into_array(); + assert_arrays_eq!(expected, result); + + Ok(()) + } + + #[test] + fn test_take_duplicates() -> VortexResult<()> { + // Array with base values [0, 0, 0, 0, 0] patched at index [2] with value [99] + let array = make_patched_array(&[0; 5], &[2], &[99])?.into_array(); + + // Take the same patched index multiple times + let indices = buffer![2u32, 2, 0, 2].into_array(); + let result = array.take(indices)?; + + let expected = PrimitiveArray::from_iter([99u16, 99, 0, 99]).into_array(); + assert_arrays_eq!(expected, result); + + Ok(()) + } + #[test] - fn test_take() { - // Patch some values here instead. + fn test_take_with_null_indices() -> VortexResult<()> { + use crate::arrays::BoolArray; + use crate::validity::Validity; + + // Array: 10 elements, base value 0, patches at indices 2, 5, 8 with values 20, 50, 80 + let array = make_patched_array(&[0; 10], &[2, 5, 8], &[20, 50, 80])?.into_array(); + + // Take 10 indices, with nulls at positions 1, 4, 7 + // Indices: [0, 2, 2, 5, 8, 0, 5, 8, 3, 1] + // Nulls: [ , , N, , , N, , , N, ] + // Position 2 (index=2, patched) is null + // Position 5 (index=0, unpatched) is null + // Position 8 (index=3, unpatched) is null + let indices = PrimitiveArray::new( + buffer![0u32, 2, 2, 5, 8, 0, 5, 8, 3, 1], + Validity::Array( + BoolArray::from_iter([ + true, true, false, true, true, false, true, true, false, true, + ]) + .into_array(), + ), + ); + let result = array.take(indices.into_array())?; + + // Expected: [0, 20, null, 50, 80, null, 50, 80, null, 0] + let expected = PrimitiveArray::new( + buffer![0u16, 20, 0, 50, 80, 0, 50, 80, 0, 0], + Validity::Array( + BoolArray::from_iter([ + true, true, false, true, true, false, true, true, false, true, + ]) + .into_array(), + ), + ); + assert_arrays_eq!(expected.into_array(), result); + + Ok(()) } } From 5286eea972769e769bc16b329e054835b4a5faf2 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 18 Mar 2026 17:21:22 -0400 Subject: [PATCH 4/7] final Signed-off-by: Andrew Duffy --- vortex-array/public-api.lock | 368 +++++++++++++++++++++++ vortex-array/src/arrays/patched/array.rs | 2 +- vortex-buffer/public-api.lock | 4 + 3 files changed, 373 insertions(+), 1 deletion(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index c8ed0c94938..43f7f977d2c 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -2000,6 +2000,10 @@ impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::dict::Dic pub fn vortex_array::arrays::dict::Dict::take(array: &vortex_array::arrays::dict::DictArray, indices: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::take(array: &Self::Array, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + pub trait vortex_array::arrays::dict::TakeReduce: vortex_array::vtable::VTable pub fn vortex_array::arrays::dict::TakeReduce::take(array: &Self::Array, indices: &vortex_array::ArrayRef) -> vortex_error::VortexResult> @@ -2386,6 +2390,10 @@ impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::null:: pub fn vortex_array::arrays::null::Null::filter(_array: &vortex_array::arrays::null::NullArray, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> +impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::filter(array: &Self::Array, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> + pub mod vortex_array::arrays::fixed_size_list pub struct vortex_array::arrays::fixed_size_list::FixedSizeList @@ -3148,6 +3156,170 @@ impl vortex_array::IntoArray for vortex_array::arrays::null::NullArray pub fn vortex_array::arrays::null::NullArray::into_array(self) -> vortex_array::ArrayRef +pub mod vortex_array::arrays::patched + +pub struct vortex_array::arrays::patched::LanePatches<'a, V> + +pub vortex_array::arrays::patched::LanePatches::indices: &'a [u16] + +pub vortex_array::arrays::patched::LanePatches::values: &'a [V] + +impl<'a, V: core::marker::Copy> vortex_array::arrays::patched::LanePatches<'a, V> + +pub fn vortex_array::arrays::patched::LanePatches<'a, V>::is_empty(&self) -> bool + +pub fn vortex_array::arrays::patched::LanePatches<'a, V>::iter(&self) -> impl core::iter::traits::iterator::Iterator + +pub fn vortex_array::arrays::patched::LanePatches<'a, V>::len(&self) -> usize + +pub struct vortex_array::arrays::patched::PatchAccessor<'a, V> + +impl<'a, V: core::marker::Sized> vortex_array::arrays::patched::PatchAccessor<'a, V> + +pub fn vortex_array::arrays::patched::PatchAccessor<'a, V>::access(&'a self, chunk: usize, lane: usize) -> vortex_array::arrays::patched::LanePatches<'a, V> + +pub struct vortex_array::arrays::patched::Patched + +impl core::fmt::Debug for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::take(array: &Self::Array, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::filter(array: &Self::Array, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> + +impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> + +impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult + +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: &Self::Array, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(_array: &Self::Array) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + +impl vortex_array::vtable::ValidityChild for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::validity_child(array: &vortex_array::arrays::patched::PatchedArray) -> &vortex_array::ArrayRef + +pub struct vortex_array::arrays::patched::PatchedArray + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::accessor(&self) -> vortex_array::arrays::patched::PatchAccessor<'_, V> + +pub fn vortex_array::arrays::patched::PatchedArray::from_array_and_patches(inner: vortex_array::ArrayRef, patches: &vortex_array::patches::Patches, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::to_array(&self) -> vortex_array::ArrayRef + +impl core::clone::Clone for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::clone(&self) -> vortex_array::arrays::patched::PatchedArray + +impl core::convert::AsRef for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::as_ref(&self) -> &dyn vortex_array::DynArray + +impl core::convert::From for vortex_array::ArrayRef + +pub fn vortex_array::ArrayRef::from(value: vortex_array::arrays::patched::PatchedArray) -> vortex_array::ArrayRef + +impl core::fmt::Debug for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::PatchedArray::Target = dyn vortex_array::DynArray + +pub fn vortex_array::arrays::patched::PatchedArray::deref(&self) -> &Self::Target + +impl vortex_array::IntoArray for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::into_array(self) -> vortex_array::ArrayRef + +pub struct vortex_array::arrays::patched::PatchedMetadata + +impl core::clone::Clone for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::clone(&self) -> vortex_array::arrays::patched::PatchedMetadata + +impl core::default::Default for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::default() -> Self + +impl core::fmt::Debug for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl prost::message::Message for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::clear(&mut self) + +pub fn vortex_array::arrays::patched::PatchedMetadata::encoded_len(&self) -> usize + pub mod vortex_array::arrays::primitive #[repr(transparent)] pub struct vortex_array::arrays::primitive::NativeValue(pub T) @@ -3982,6 +4154,10 @@ impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::null::Nu pub fn vortex_array::arrays::null::Null::slice(_array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> +impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> + impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::scalar_fn::ScalarFnVTable pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> @@ -6626,6 +6802,128 @@ impl vortex_array::IntoArray for vortex_array::arrays::null::NullArray pub fn vortex_array::arrays::null::NullArray::into_array(self) -> vortex_array::ArrayRef +pub struct vortex_array::arrays::Patched + +impl core::fmt::Debug for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::take(array: &Self::Array, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::filter(array: &Self::Array, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> + +impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> + +impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult + +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: &Self::Array, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(_array: &Self::Array) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + +impl vortex_array::vtable::ValidityChild for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::validity_child(array: &vortex_array::arrays::patched::PatchedArray) -> &vortex_array::ArrayRef + +pub struct vortex_array::arrays::PatchedArray + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::accessor(&self) -> vortex_array::arrays::patched::PatchAccessor<'_, V> + +pub fn vortex_array::arrays::patched::PatchedArray::from_array_and_patches(inner: vortex_array::ArrayRef, patches: &vortex_array::patches::Patches, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::to_array(&self) -> vortex_array::ArrayRef + +impl core::clone::Clone for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::clone(&self) -> vortex_array::arrays::patched::PatchedArray + +impl core::convert::AsRef for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::as_ref(&self) -> &dyn vortex_array::DynArray + +impl core::convert::From for vortex_array::ArrayRef + +pub fn vortex_array::ArrayRef::from(value: vortex_array::arrays::patched::PatchedArray) -> vortex_array::ArrayRef + +impl core::fmt::Debug for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::PatchedArray::Target = dyn vortex_array::DynArray + +pub fn vortex_array::arrays::patched::PatchedArray::deref(&self) -> &Self::Target + +impl vortex_array::IntoArray for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::into_array(self) -> vortex_array::ArrayRef + pub struct vortex_array::arrays::Primitive impl vortex_array::arrays::Primitive @@ -16794,6 +17092,10 @@ impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::array pub fn vortex_array::arrays::dict::Dict::compare(lhs: &vortex_array::arrays::dict::DictArray, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + pub fn vortex_array::scalar_fn::fns::binary::and_kleene(lhs: &vortex_array::ArrayRef, rhs: &vortex_array::ArrayRef) -> vortex_error::VortexResult pub fn vortex_array::scalar_fn::fns::binary::compare_nested_arrow_arrays(lhs: &dyn arrow_array::array::Array, rhs: &dyn arrow_array::array::Array, operator: vortex_array::scalar_fn::fns::operators::CompareOperator) -> vortex_error::VortexResult @@ -20244,6 +20546,10 @@ impl vortex_array::vtable::OperationsVTable fo pub fn vortex_array::arrays::null::Null::scalar_at(_array: &vortex_array::arrays::null::NullArray, _index: usize) -> vortex_error::VortexResult +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult + impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::scalar_fn::ScalarFnVTable pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::scalar_at(array: &vortex_array::arrays::scalar_fn::ScalarFnArray, index: usize) -> vortex_error::VortexResult @@ -21228,6 +21534,60 @@ pub fn vortex_array::arrays::null::Null::stats(array: &vortex_array::arrays::nul pub fn vortex_array::arrays::null::Null::with_children(_array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: &Self::Array, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(_array: &Self::Array) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + impl vortex_array::vtable::VTable for vortex_array::arrays::scalar_fn::ScalarFnVTable pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::Array = vortex_array::arrays::scalar_fn::ScalarFnArray @@ -21344,6 +21704,10 @@ impl vortex_array::vtable::ValidityChild for vo pub fn vortex_array::arrays::Extension::validity_child(array: &vortex_array::arrays::ExtensionArray) -> &vortex_array::ArrayRef +impl vortex_array::vtable::ValidityChild for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::validity_child(array: &vortex_array::arrays::patched::PatchedArray) -> &vortex_array::ArrayRef + pub trait vortex_array::vtable::ValidityChildSliceHelper pub fn vortex_array::vtable::ValidityChildSliceHelper::sliced_child_array(&self) -> vortex_error::VortexResult @@ -22480,6 +22844,10 @@ impl vortex_array::IntoArray for vortex_array::arrays::null::NullArray pub fn vortex_array::arrays::null::NullArray::into_array(self) -> vortex_array::ArrayRef +impl vortex_array::IntoArray for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::into_array(self) -> vortex_array::ArrayRef + impl vortex_array::IntoArray for vortex_array::arrays::scalar_fn::ScalarFnArray pub fn vortex_array::arrays::scalar_fn::ScalarFnArray::into_array(self) -> vortex_array::ArrayRef diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index 9f96f3c5c9f..8a6c3e9de17 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -216,7 +216,7 @@ fn transpose( lane_offsets[index] += lane_offsets[index - 1]; } - // Loop over patches, writing thme to final positions + // Loop over patches, writing them to final positions let indices_out = indices_buffer.spare_capacity_mut(); let values_out = values_buffer.spare_capacity_mut(); for (index, &value) in std::iter::zip(indices_in, values_in) { diff --git a/vortex-buffer/public-api.lock b/vortex-buffer/public-api.lock index 17c1bff5ea9..c2e3de20b67 100644 --- a/vortex-buffer/public-api.lock +++ b/vortex-buffer/public-api.lock @@ -552,6 +552,10 @@ pub fn vortex_buffer::Buffer::from_arrow_buffer(arrow: arrow_buffer::buffer: pub fn vortex_buffer::Buffer::into_arrow_buffer(self) -> arrow_buffer::buffer::immutable::Buffer +impl vortex_buffer::Buffer + +pub fn vortex_buffer::Buffer::reinterpret(&self) -> &[V] + impl vortex_buffer::Buffer pub fn vortex_buffer::Buffer::from_arrow_scalar_buffer(arrow: arrow_buffer::buffer::scalar::ScalarBuffer) -> Self From 1ad47c722cc02dd8066367f8ac8aea0d75382233 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 18 Mar 2026 17:36:19 -0400 Subject: [PATCH 5/7] actually make the kernel get used Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/vtable/kernels.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vortex-array/src/arrays/patched/vtable/kernels.rs b/vortex-array/src/arrays/patched/vtable/kernels.rs index 2c60ec4a20f..7994b19e02e 100644 --- a/vortex-array/src/arrays/patched/vtable/kernels.rs +++ b/vortex-array/src/arrays/patched/vtable/kernels.rs @@ -2,8 +2,11 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use crate::arrays::Patched; +use crate::arrays::dict::TakeExecuteAdaptor; use crate::kernel::ParentKernelSet; use crate::scalar_fn::fns::binary::CompareExecuteAdaptor; -pub(super) const PARENT_KERNELS: ParentKernelSet = - ParentKernelSet::new(&[ParentKernelSet::lift(&CompareExecuteAdaptor(Patched))]); +pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ + ParentKernelSet::lift(&CompareExecuteAdaptor(Patched)), + ParentKernelSet::lift(&TakeExecuteAdaptor(Patched)), +]); From 6c1d7aa3cae2878eb5bacf14ed30fe5bf830a2d9 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 18 Mar 2026 21:14:21 -0400 Subject: [PATCH 6/7] fix tests Signed-off-by: Andrew Duffy --- .../src/arrays/patched/compute/take.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs index 3db64313be4..92b81f81c02 100644 --- a/vortex-array/src/arrays/patched/compute/take.rs +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -101,10 +101,10 @@ fn take_map( // Now, iterate the take indices using the prebuilt hashmap. // Undefined/null indices will miss the hash map, which we can ignore. - for index in indices { + for (output_index, index) in indices.iter().enumerate() { let index = index.as_(); if let Some(&patch_value) = index_map.get(&index) { - output[index] = patch_value; + output[output_index] = patch_value; } } } @@ -150,7 +150,7 @@ mod tests { // Take indices [0, 1, 2, 3, 4] - should get [0, 10, 0, 30, 0] let indices = buffer![0u32, 1, 2, 3, 4].into_array(); - let result = array.take(indices)?; + let result = array.take(indices)?.to_canonical()?.into_array(); let expected = PrimitiveArray::from_iter([0u16, 10, 0, 30, 0]).into_array(); assert_arrays_eq!(expected, result); @@ -165,7 +165,7 @@ mod tests { // Take indices in reverse order let indices = buffer![4u32, 3, 2, 1, 0].into_array(); - let result = array.take(indices)?; + let result = array.take(indices)?.to_canonical()?.into_array(); let expected = PrimitiveArray::from_iter([0u16, 30, 0, 10, 0]).into_array(); assert_arrays_eq!(expected, result); @@ -180,7 +180,10 @@ mod tests { // Take the same patched index multiple times let indices = buffer![2u32, 2, 0, 2].into_array(); - let result = array.take(indices)?; + let result = array.take(indices)?.to_canonical()?.into_array(); + + // execute the array. + let _canonical = result.to_canonical()?.into_primitive(); let expected = PrimitiveArray::from_iter([99u16, 99, 0, 99]).into_array(); assert_arrays_eq!(expected, result); @@ -211,7 +214,10 @@ mod tests { .into_array(), ), ); - let result = array.take(indices.into_array())?; + let result = array + .take(indices.into_array())? + .to_canonical()? + .into_array(); // Expected: [0, 20, null, 50, 80, null, 50, 80, null, 0] let expected = PrimitiveArray::new( From 35bfb5f0046774307ad76ff7e0ec405367dbc712 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Thu, 19 Mar 2026 11:28:29 -0400 Subject: [PATCH 7/7] use child for values instead of buffer Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/array.rs | 32 +++++++++---- .../src/arrays/patched/compute/compare.rs | 12 ++++- .../src/arrays/patched/compute/take.rs | 8 +++- vortex-array/src/arrays/patched/mod.rs | 29 +++++++---- vortex-array/src/arrays/patched/vtable/mod.rs | 48 ++++++++++--------- .../src/arrays/patched/vtable/operations.rs | 27 +++++------ .../src/arrays/patched/vtable/slice.rs | 2 +- 7 files changed, 99 insertions(+), 59 deletions(-) diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index 8a6c3e9de17..4bf26dafc17 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -12,17 +12,19 @@ use crate::ArrayRef; use crate::Canonical; use crate::DynArray; use crate::ExecutionCtx; +use crate::IntoArray; +use crate::arrays::PrimitiveArray; use crate::arrays::patched::PatchAccessor; use crate::arrays::patched::TransposedPatches; use crate::arrays::patched::patch_lanes; use crate::buffer::BufferHandle; use crate::dtype::IntegerPType; use crate::dtype::NativePType; -use crate::dtype::PType; use crate::match_each_native_ptype; use crate::match_each_unsigned_integer_ptype; use crate::patches::Patches; use crate::stats::ArrayStats; +use crate::validity::Validity; /// An array that partially "patches" another array with new values. /// @@ -50,14 +52,17 @@ pub struct PatchedArray { /// indices within a 1024-element chunk. The PType of these MUST be u16 pub(super) indices: BufferHandle, /// patch values corresponding to the indices. The ptype is specified by `values_ptype`. - pub(super) values: BufferHandle, - /// PType of the scalars in `values`. Can be any native type. - pub(super) values_ptype: PType, + pub(super) values: ArrayRef, pub(super) stats_set: ArrayStats, } impl PatchedArray { + /// Create a new `PatchedArray` from a child array and a set of [`Patches`]. + /// + /// # Errors + /// + /// The `inner` array must be primitive type, and it must have the same DType as the patches. pub fn from_array_and_patches( inner: ArrayRef, patches: &Patches, @@ -68,6 +73,11 @@ impl PatchedArray { "array DType must match patches DType" ); + vortex_ensure!( + inner.dtype().is_primitive(), + "Creating PatchedArray from Patches only supported for primitive arrays" + ); + let values_ptype = patches.dtype().as_ptype(); let TransposedPatches { @@ -80,27 +90,32 @@ impl PatchedArray { let len = inner.len(); + let values = PrimitiveArray::from_buffer_handle( + BufferHandle::new_host(values), + values_ptype, + Validity::NonNullable, + ) + .into_array(); + Ok(Self { inner, n_chunks, n_lanes, - values_ptype, offset: 0, len, lane_offsets: BufferHandle::new_host(lane_offsets), indices: BufferHandle::new_host(indices), - values: BufferHandle::new_host(values), + values, stats_set: ArrayStats::default(), }) } /// Get an accessor, which allows ranged access to patches by chunk/lane. - pub fn accessor(&self) -> PatchAccessor<'_, V> { + pub fn accessor(&self) -> PatchAccessor<'_> { PatchAccessor { n_lanes: self.n_lanes, lane_offsets: self.lane_offsets.as_host().reinterpret::(), indices: self.indices.as_host().reinterpret::(), - values: self.values.as_host().reinterpret::(), } } @@ -133,7 +148,6 @@ impl PatchedArray { len, indices, values, - values_ptype: self.values_ptype, lane_offsets: sliced_lane_offsets, stats_set: ArrayStats::default(), }) diff --git a/vortex-array/src/arrays/patched/compute/compare.rs b/vortex-array/src/arrays/patched/compute/compare.rs index d1932ed1e44..51eeb63a432 100644 --- a/vortex-array/src/arrays/patched/compute/compare.rs +++ b/vortex-array/src/arrays/patched/compute/compare.rs @@ -12,6 +12,7 @@ use crate::IntoArray; use crate::arrays::BoolArray; use crate::arrays::ConstantArray; use crate::arrays::Patched; +use crate::arrays::PrimitiveArray; use crate::arrays::bool::BoolArrayParts; use crate::arrays::patched::patch_lanes; use crate::arrays::primitive::NativeValue; @@ -28,6 +29,12 @@ impl CompareKernel for Patched { operator: CompareOperator, ctx: &mut ExecutionCtx, ) -> VortexResult> { + // We only accelerate comparisons for primitives + if !lhs.dtype().is_primitive() { + return Ok(None); + } + + // We only accelerate comparisons against constants let Some(constant) = rhs.as_constant() else { return Ok(None); }; @@ -87,9 +94,10 @@ impl CompareKernel for Patched { let lane_offsets = lhs.lane_offsets.as_host().reinterpret::(); let indices = lhs.indices.as_host().reinterpret::(); + let values = lhs.values.clone().execute::(ctx)?; - match_each_native_ptype!(lhs.values_ptype, |V| { - let values = lhs.values.as_host().reinterpret::(); + match_each_native_ptype!(values.ptype(), |V| { + let values = values.as_slice::(); let constant = constant .as_primitive() .as_::() diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs index 92b81f81c02..eee1a4c898a 100644 --- a/vortex-array/src/arrays/patched/compute/take.rs +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -24,6 +24,11 @@ impl TakeExecute for Patched { indices: &ArrayRef, ctx: &mut ExecutionCtx, ) -> VortexResult> { + // Only pushdown take when we have primitive types. + if !array.dtype().is_primitive() { + return Ok(None); + } + // Perform take on the inner array, including the placeholders. let inner = array .inner @@ -41,6 +46,7 @@ impl TakeExecute for Patched { match_each_unsigned_integer_ptype!(indices_ptype, |I| { match_each_native_ptype!(ptype, |V| { let indices = indices.clone().execute::(ctx)?; + let values = array.values.clone().execute::(ctx)?; let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); take_map( output.as_mut(), @@ -51,7 +57,7 @@ impl TakeExecute for Patched { array.n_lanes, array.lane_offsets.as_host().reinterpret::(), array.indices.as_host().reinterpret::(), - array.values.as_host().reinterpret::(), + values.as_slice::(), ); // SAFETY: output and validity still have same length after take_map returns. diff --git a/vortex-array/src/arrays/patched/mod.rs b/vortex-array/src/arrays/patched/mod.rs index f035204c188..d37f15419b9 100644 --- a/vortex-array/src/arrays/patched/mod.rs +++ b/vortex-array/src/arrays/patched/mod.rs @@ -32,23 +32,34 @@ const fn patch_lanes() -> usize { if size_of::() < 8 { 32 } else { 16 } } -pub struct PatchAccessor<'a, V> { +pub struct PatchAccessor<'a> { n_lanes: usize, lane_offsets: &'a [u32], indices: &'a [u16], - values: &'a [V], } -impl<'a, V: Sized> PatchAccessor<'a, V> { - /// Access the patches for a particular lane - pub fn access(&'a self, chunk: usize, lane: usize) -> LanePatches<'a, V> { +pub struct PatchOffset { + /// Global offset into the list of patches. These are some of the + pub index: usize, + /// This is the value stored in the `indices` buffer, which encodes the offset of the `index`-th + /// patch + pub chunk_offset: u16, +} + +impl<'a> PatchAccessor<'a> { + /// Get an iterator over indices and values offsets. + /// + /// The first component is the index into the `indices` and `values`, and the second component + /// is the set of values instead here...I think? + pub fn offsets_iter( + &self, + chunk: usize, + lane: usize, + ) -> impl Iterator + '_ { let start = self.lane_offsets[chunk * self.n_lanes + lane] as usize; let stop = self.lane_offsets[chunk * self.n_lanes + lane + 1] as usize; - LanePatches { - indices: &self.indices[start..stop], - values: &self.values[start..stop], - } + std::iter::zip(start..stop, self.indices[start..stop].iter().copied()) } } diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index bcc87d869ce..c144cde57f3 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -86,7 +86,6 @@ impl VTable for Patched { fn array_hash(array: &Self::Array, state: &mut H, precision: Precision) { array.inner.array_hash(state, precision); - array.values_ptype.hash(state); array.n_chunks.hash(state); array.n_lanes.hash(state); array.lane_offsets.array_hash(state, precision); @@ -97,7 +96,6 @@ impl VTable for Patched { fn array_eq(array: &Self::Array, other: &Self::Array, precision: Precision) -> bool { array.n_chunks == other.n_chunks && array.n_lanes == other.n_lanes - && array.values_ptype == other.values_ptype && array.inner.array_eq(&other.inner, precision) && array.lane_offsets.array_eq(&other.lane_offsets, precision) && array.indices.array_eq(&other.indices, precision) @@ -112,7 +110,6 @@ impl VTable for Patched { match idx { 0 => array.lane_offsets.clone(), 1 => array.indices.clone(), - 2 => array.values.clone(), _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } @@ -121,28 +118,27 @@ impl VTable for Patched { match idx { 0 => Some("lane_offsets".to_string()), 1 => Some("patch_indices".to_string()), - 2 => Some("patch_values".to_string()), _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } fn nchildren(_array: &Self::Array) -> usize { - 1 + 2 } fn child(array: &Self::Array, idx: usize) -> ArrayRef { - if idx == 0 { - array.inner.clone() - } else { - vortex_panic!("invalid child index for PatchedArray: {idx}"); + match idx { + 0 => array.inner.clone(), + 1 => array.values.clone(), + _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } fn child_name(_array: &Self::Array, idx: usize) -> String { - if idx == 0 { - "inner".to_string() - } else { - vortex_panic!("invalid child index for PatchedArray: {idx}"); + match idx { + 0 => "inner".to_string(), + 1 => "patch_values".to_string(), + _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } @@ -181,10 +177,14 @@ impl VTable for Patched { let n_lanes = match_each_native_ptype!(dtype.as_ptype(), |P| { patch_lanes::

() }); - let &[lane_offsets, indices, values] = &buffers else { + let &[lane_offsets, indices] = &buffers else { vortex_bail!("invalid buffer count for PatchedArray"); }; + // values and indices should have same len. + let expected_len = indices.as_host().reinterpret::().len(); + let values = children.get(1, dtype, expected_len)?; + Ok(PatchedArray { inner, n_chunks, @@ -193,19 +193,19 @@ impl VTable for Patched { len, lane_offsets: lane_offsets.clone(), indices: indices.clone(), - values: values.clone(), - values_ptype: dtype.as_ptype(), + values, stats_set: ArrayStats::default(), }) } fn with_children(array: &mut Self::Array, mut children: Vec) -> VortexResult<()> { vortex_ensure!( - children.len() == 1, - "PatchedArray must have exactly 1 child" + children.len() == 2, + "PatchedArray must have exactly 2 children" ); array.inner = children.remove(0); + array.values = children.remove(0); Ok(()) } @@ -226,15 +226,17 @@ impl VTable for Patched { let lane_offsets: Buffer = Buffer::from_byte_buffer(array.lane_offsets.clone().unwrap_host()); let indices: Buffer = Buffer::from_byte_buffer(array.indices.clone().unwrap_host()); + let values = array.values.clone().execute::(ctx)?; + + // TODO(aduffy): add support for non-primitive PatchedArray patches application. - let patched_values = match_each_native_ptype!(array.values_ptype, |V| { + let patched_values = match_each_native_ptype!(values.ptype(), |V| { let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); - let values: Buffer = Buffer::from_byte_buffer(array.values.clone().unwrap_host()); let offset = array.offset; let len = array.len; - apply::( + apply_patches_primitive::( &mut output, offset, len, @@ -242,7 +244,7 @@ impl VTable for Patched { array.n_lanes, &lane_offsets, &indices, - &values, + values.as_slice::(), ); // The output will always be aligned to a chunk boundary, we apply the offset/len @@ -276,7 +278,7 @@ impl VTable for Patched { /// Apply patches on top of the existing value types. #[allow(clippy::too_many_arguments)] -fn apply( +fn apply_patches_primitive( output: &mut [V], offset: usize, len: usize, diff --git a/vortex-array/src/arrays/patched/vtable/operations.rs b/vortex-array/src/arrays/patched/vtable/operations.rs index ddf5dcec590..7f24e5fed9f 100644 --- a/vortex-array/src/arrays/patched/vtable/operations.rs +++ b/vortex-array/src/arrays/patched/vtable/operations.rs @@ -7,6 +7,7 @@ use crate::DynArray; use crate::arrays::patched::Patched; use crate::arrays::patched::PatchedArray; use crate::arrays::patched::patch_lanes; +use crate::dtype::PType; use crate::match_each_native_ptype; use crate::scalar::Scalar; use crate::vtable::OperationsVTable; @@ -17,21 +18,19 @@ impl OperationsVTable for Patched { let chunk = index / 1024; #[allow(clippy::cast_possible_truncation)] let chunk_index = (index % 1024) as u16; - match_each_native_ptype!(array.values_ptype, |V| { - let lane = index % patch_lanes::(); - let accessor = array.accessor::(); - let patches = accessor.access(chunk, lane); - // NOTE: we do linear scan as lane has <= 32 patches, binary search would likely - // be slower. - for (patch_index, patch_value) in patches.iter() { - if patch_index == chunk_index { - return Ok(Scalar::primitive( - patch_value, - array.inner.dtype().nullability(), - )); - } + + let values_ptype = PType::try_from(array.dtype())?; + + let lane = match_each_native_ptype!(values_ptype, |V| { index % patch_lanes::() }); + let accessor = array.accessor(); + + // NOTE: we do linear scan as lane has <= 32 patches, binary search would likely + // be slower. + for (index, patch_index) in accessor.offsets_iter(chunk, lane) { + if patch_index == chunk_index { + return array.values.scalar_at(index); } - }); + } // Otherwise, access the underlying value. array.inner.scalar_at(index) diff --git a/vortex-array/src/arrays/patched/vtable/slice.rs b/vortex-array/src/arrays/patched/vtable/slice.rs index 99d04666d5f..b67b68ffe0f 100644 --- a/vortex-array/src/arrays/patched/vtable/slice.rs +++ b/vortex-array/src/arrays/patched/vtable/slice.rs @@ -45,7 +45,6 @@ impl SliceReduce for Patched { lane_offsets: sliced_lane_offsets, indices: array.indices.clone(), values: array.values.clone(), - values_ptype: array.values_ptype, stats_set: ArrayStats::default(), } .into_array(), @@ -93,6 +92,7 @@ mod tests { @r#" root: vortex.patched(u16, len=9) inner: vortex.primitive(u16, len=512) + patch_values: vortex.primitive(u16, len=3) "#); let executed = sliced.execute::(&mut ctx)?.into_primitive();