From 2c4b32ab6cad99c1d48548877a317c1241a1c7bc Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Thu, 26 Feb 2026 00:42:49 +0000 Subject: [PATCH 01/18] Update DeltaArray to always pad values to 1024 elements and use bit transpose functions Signed-off-by: Robert Kruszewski --- encodings/fastlanes/src/bit_transpose/mod.rs | 8 + .../fastlanes/src/bit_transpose/validity.rs | 97 ++++++++++ .../src/delta/array/delta_compress.rs | 172 ++++++++++-------- .../src/delta/array/delta_decompress.rs | 75 ++++---- encodings/fastlanes/src/delta/array/mod.rs | 60 +++--- encodings/fastlanes/src/delta/compute/cast.rs | 24 ++- encodings/fastlanes/src/delta/vtable/mod.rs | 3 +- .../fastlanes/src/delta/vtable/operations.rs | 166 +++++++++++++---- .../fastlanes/src/delta/vtable/validity.rs | 23 ++- vortex-array/src/arrays/assertions.rs | 34 ++-- vortex-buffer/src/bit/buf.rs | 1 - .../arrays/synthetic/encodings/delta.rs | 26 ++- vortex/benches/single_encoding_throughput.rs | 13 +- 13 files changed, 477 insertions(+), 225 deletions(-) create mode 100644 encodings/fastlanes/src/bit_transpose/validity.rs diff --git a/encodings/fastlanes/src/bit_transpose/mod.rs b/encodings/fastlanes/src/bit_transpose/mod.rs index 864591af9d0..01e5d4a0b1c 100644 --- a/encodings/fastlanes/src/bit_transpose/mod.rs +++ b/encodings/fastlanes/src/bit_transpose/mod.rs @@ -25,6 +25,10 @@ mod scalar; #[cfg(not(feature = "_test-harness"))] mod x86; +mod validity; + +pub use validity::*; + /// Base indices for the first 64 output bytes (lanes 0-7). /// Each entry indicates the starting input byte index for that output byte group. /// Pattern: [0*2, 4*2, 2*2, 6*2, 1*2, 5*2, 3*2, 7*2] = [0, 8, 4, 12, 2, 10, 6, 14] @@ -39,6 +43,8 @@ const TRANSPOSE_2X2: u64 = 0x00AA_00AA_00AA_00AA; const TRANSPOSE_4X4: u64 = 0x0000_CCCC_0000_CCCC; const TRANSPOSE_8X8: u64 = 0x0000_0000_F0F0_F0F0; +/// Transpose 1024-bits into FastLanes layout. +/// /// Dispatch to the best available implementation at runtime. #[inline] pub fn transpose_bits(input: &[u8; 128], output: &mut [u8; 128]) { @@ -64,6 +70,8 @@ pub fn transpose_bits(input: &[u8; 128], output: &mut [u8; 128]) { scalar::transpose_bits_scalar(input, output); } +/// Untranspose 1024-bits from FastLanes layout. +/// /// Dispatch untranspose to the best available implementation at runtime. #[inline] pub fn untranspose_bits(input: &[u8; 128], output: &mut [u8; 128]) { diff --git a/encodings/fastlanes/src/bit_transpose/validity.rs b/encodings/fastlanes/src/bit_transpose/validity.rs new file mode 100644 index 00000000000..5a10cd66532 --- /dev/null +++ b/encodings/fastlanes/src/bit_transpose/validity.rs @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::mem; +use std::mem::MaybeUninit; + +use vortex_array::Canonical; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::BoolArray; +use vortex_array::validity::Validity; +use vortex_buffer::BitBuffer; +use vortex_buffer::ByteBufferMut; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; + +use crate::bit_transpose::transpose_bits; +use crate::bit_transpose::untranspose_bits; + +pub fn transpose_validity(validity: &Validity, ctx: &mut ExecutionCtx) -> VortexResult { + match validity { + Validity::Array(mask) => { + let bools = mask + .clone() + .execute::(ctx)? + .into_bool() + .into_bit_buffer(); + + Ok(Validity::Array( + BoolArray::new(transpose_bitbuffer(bools), Validity::NonNullable).into_array(), + )) + } + v @ Validity::AllValid | v @ Validity::AllInvalid | v @ Validity::NonNullable => { + Ok(v.clone()) + } + } +} + +#[inline] +pub fn transpose_bitbuffer(bits: BitBuffer) -> BitBuffer { + fastlanes_layout_apply(bits, transpose_bits) +} + +pub fn untranspose_validity(validity: &Validity, ctx: &mut ExecutionCtx) -> VortexResult { + match validity { + Validity::Array(mask) => { + let bools = mask + .clone() + .execute::(ctx)? + .into_bool() + .into_bit_buffer(); + + Ok(Validity::Array( + BoolArray::new(untranspose_bitbuffer(bools), Validity::NonNullable).into_array(), + )) + } + v @ Validity::AllValid | v @ Validity::AllInvalid | v @ Validity::NonNullable => { + Ok(v.clone()) + } + } +} + +#[inline] +pub fn untranspose_bitbuffer(bits: BitBuffer) -> BitBuffer { + fastlanes_layout_apply(bits, untranspose_bits) +} + +fn fastlanes_layout_apply(bits: BitBuffer, op: F) -> BitBuffer { + let (offset, len, bytes) = bits.into_inner(); + + let output_len = bytes.len().next_multiple_of(128); + let mut output = ByteBufferMut::with_capacity(output_len); + let (input_chunks, input_trailer) = bytes.as_chunks::<128>(); + // We can ignore the spare trailer capacity that can be an artifact of allocator as we requested 128 multiple chunks + let (output_chunks, _) = output.spare_capacity_mut().as_chunks_mut::<128>(); + + for (input, output) in input_chunks.iter().zip(output_chunks.iter_mut()) { + op(input, unsafe { + mem::transmute::<&mut [MaybeUninit; 128], &mut [u8; 128]>(output) + }); + } + + if !input_trailer.is_empty() { + let mut padded_input = [0u8; 128]; + padded_input[0..input_trailer.len()].clone_from_slice(input_trailer); + op(&padded_input, unsafe { + mem::transmute::<&mut [MaybeUninit; 128], &mut [u8; 128]>( + output_chunks + .last_mut() + .vortex_expect("Output wasn't a multiple of 128 bytes"), + ) + }); + } + + unsafe { output.set_len(output_len) }; + BitBuffer::new_with_offset(output.freeze().into_byte_buffer(), len, offset) +} diff --git a/encodings/fastlanes/src/delta/array/delta_compress.rs b/encodings/fastlanes/src/delta/array/delta_compress.rs index f15c3ad8d58..93d730c1a65 100644 --- a/encodings/fastlanes/src/delta/array/delta_compress.rs +++ b/encodings/fastlanes/src/delta/array/delta_compress.rs @@ -1,91 +1,128 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrayref::array_mut_ref; -use arrayref::array_ref; +use std::mem; +use std::mem::MaybeUninit; + use fastlanes::Delta; use fastlanes::FastLanes; use fastlanes::Transpose; -use num_traits::WrappingSub; +use vortex_array::Canonical; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::BoolArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::dtype::NativePType; use vortex_array::match_each_unsigned_integer_ptype; +use vortex_array::validity::Validity; use vortex_array::vtable::ValidityHelper; +use vortex_buffer::BitBuffer; use vortex_buffer::Buffer; use vortex_buffer::BufferMut; use vortex_error::VortexResult; -pub fn delta_compress(array: &PrimitiveArray) -> VortexResult<(PrimitiveArray, PrimitiveArray)> { - // TODO(ngates): fill forward nulls? - // let filled = fill_forward(array)?.to_primitive()?; +use crate::bit_transpose::transpose_bitbuffer; - // Compress the filled array +pub fn delta_compress( + array: &PrimitiveArray, + ctx: &mut ExecutionCtx, +) -> VortexResult<(PrimitiveArray, PrimitiveArray)> { let (bases, deltas) = match_each_unsigned_integer_ptype!(array.ptype(), |T| { const LANES: usize = T::LANES; let (bases, deltas) = compress_primitive::(array.as_slice::()); + let validity = transpose_and_pad_validity(array.validity(), deltas.len(), ctx)?; ( - // To preserve nullability, we include Validity PrimitiveArray::new(bases, array.dtype().nullability().into()), - PrimitiveArray::new(deltas, array.validity().clone()), + PrimitiveArray::new(deltas, validity), ) }); Ok((bases, deltas)) } -fn compress_primitive( +/// Transpose and pad validity to match the padded deltas length. +/// +/// For [`Validity::Array`], the validity bits are transposed into FastLanes order and then +/// extended to `padded_len`. The underlying byte buffer from transposition is already +/// padded to 128-byte alignment (1024 bits), which exactly matches our 1024-element chunks. +fn transpose_and_pad_validity( + validity: &Validity, + padded_len: usize, + ctx: &mut ExecutionCtx, +) -> VortexResult { + match validity { + Validity::Array(mask) => { + let bools = mask + .clone() + .execute::(ctx)? + .into_bool() + .into_bit_buffer(); + let transposed = transpose_bitbuffer(bools); + let (offset, _len, bytes) = transposed.into_inner(); + let padded = BitBuffer::new_with_offset(bytes, padded_len, offset); + Ok(Validity::Array( + BoolArray::new(padded, Validity::NonNullable).into_array(), + )) + } + v @ Validity::AllValid | v @ Validity::AllInvalid | v @ Validity::NonNullable => { + Ok(v.clone()) + } + } +} + +fn compress_primitive( array: &[T], ) -> (Buffer, Buffer) { - // How many fastlanes vectors we will process. - let num_chunks = array.len() / 1024; + let padded_len = array.len().next_multiple_of(1024); + let num_chunks = padded_len / 1024; + let bases_len = num_chunks * LANES; + + // Split into full 1024-element chunks and a remainder. + let (full_chunks, remainder) = array.as_chunks:: <1024>(); // Allocate result arrays. - let mut bases = BufferMut::with_capacity(num_chunks * T::LANES + 1); - let mut deltas = BufferMut::with_capacity(array.len()); - - // Loop over all the 1024-element chunks. - if num_chunks > 0 { - let mut transposed: [T; 1024] = [T::default(); 1024]; - - for i in 0..num_chunks { - let start_elem = i * 1024; - let chunk: &[T; 1024] = array_ref![array, start_elem, 1024]; - Transpose::transpose(chunk, &mut transposed); - - // Initialize and store the base vector for each chunk - bases.extend_from_slice(&transposed[0..T::LANES]); - - deltas.reserve(1024); - let delta_len = deltas.len(); - unsafe { - deltas.set_len(delta_len + 1024); - Delta::delta::( - &transposed, - &*(transposed[0..T::LANES].as_ptr().cast()), - array_mut_ref![deltas[delta_len..], 0, 1024], - ); - } + let mut bases = BufferMut::with_capacity(bases_len); + let mut deltas = BufferMut::with_capacity(padded_len); + let (output_deltas, _) = deltas.spare_capacity_mut().as_chunks_mut::<1024>(); + + // Loop over all full 1024-element chunks. + let mut transposed: [T; 1024] = [T::default(); 1024]; + for (chunk, output) in full_chunks.iter().zip(output_deltas.iter_mut()) { + Transpose::transpose(chunk, &mut transposed); + bases.extend_from_slice(&transposed[0..T::LANES]); + + unsafe { + Delta::delta::( + &transposed, + &*(transposed[0..T::LANES].as_ptr().cast()), + mem::transmute::<&mut [MaybeUninit; 1024], &mut [T; 1024]>(output), + ); } } - // To avoid padding, the remainder is encoded with scalar logic. - let remainder_size = array.len() % 1024; - if remainder_size > 0 { - let chunk = &array[array.len() - remainder_size..]; - let mut base_scalar = chunk[0]; - bases.push(base_scalar); - for next in chunk { - let diff = next.wrapping_sub(&base_scalar); - deltas.push(diff); - base_scalar = *next; + // Pad the remainder to 1024 elements and process as a full chunk. + if !remainder.is_empty() { + let mut padded_chunk = [T::default(); 1024]; + padded_chunk[..remainder.len()].copy_from_slice(remainder); + + Transpose::transpose(&padded_chunk, &mut transposed); + bases.extend_from_slice(&transposed[0..T::LANES]); + + unsafe { + Delta::delta::( + &transposed, + &*(transposed[0..T::LANES].as_ptr().cast()), + mem::transmute::<&mut [MaybeUninit; 1024], &mut [T; 1024]>( + &mut output_deltas[full_chunks.len()], + ), + ); } } - assert_eq!( - bases.len(), - num_chunks * T::LANES + (if remainder_size > 0 { 1 } else { 0 }) - ); - assert_eq!(deltas.len(), array.len()); + unsafe { deltas.set_len(padded_len) }; + + assert_eq!(bases.len(), bases_len); + assert_eq!(deltas.len(), padded_len); (bases.freeze(), deltas.freeze()) } @@ -94,6 +131,7 @@ fn compress_primitive = LazyLock::new(|| VortexSession::empty().with::()); - #[test] - fn test_compress() -> VortexResult<()> { - do_roundtrip_test((0u32..10_000).collect()) - } - - #[test] - fn test_compress_nullable() -> VortexResult<()> { - do_roundtrip_test(PrimitiveArray::from_option_iter( + #[rstest] + #[case((0u32..10_000).collect())] + #[case((0..10_000).map(|i| (i % (u8::MAX as i32)) as u8).collect())] + #[case(PrimitiveArray::from_option_iter( (0u32..10_000).map(|i| (i % 2 == 0).then_some(i)), - )) - } - - #[test] - fn test_compress_overflow() -> VortexResult<()> { - do_roundtrip_test((0..10_000).map(|i| (i % (u8::MAX as i32)) as u8).collect()) - } - - fn do_roundtrip_test(input: PrimitiveArray) -> VortexResult<()> { - let delta = DeltaArray::try_from_primitive_array(&input)?; - assert_eq!(delta.len(), input.len()); + ))] + fn test_compress(#[case] array: PrimitiveArray) -> VortexResult<()> { + let delta = + DeltaArray::try_from_primitive_array(&array, &mut SESSION.create_execution_ctx())?; + assert_eq!(delta.len(), array.len()); let decompressed = delta_decompress(&delta, &mut SESSION.create_execution_ctx())?; - assert_arrays_eq!(decompressed, input); + assert_arrays_eq!(decompressed, array); Ok(()) } } diff --git a/encodings/fastlanes/src/delta/array/delta_decompress.rs b/encodings/fastlanes/src/delta/array/delta_decompress.rs index f3b3868fde7..c678b7d9b87 100644 --- a/encodings/fastlanes/src/delta/array/delta_decompress.rs +++ b/encodings/fastlanes/src/delta/array/delta_decompress.rs @@ -1,22 +1,24 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use arrayref::array_mut_ref; -use arrayref::array_ref; +use std::mem; +use std::mem::MaybeUninit; + use fastlanes::Delta; use fastlanes::FastLanes; use fastlanes::Transpose; -use num_traits::WrappingAdd; +use itertools::Itertools; use vortex_array::ExecutionCtx; use vortex_array::arrays::PrimitiveArray; use vortex_array::dtype::NativePType; use vortex_array::match_each_unsigned_integer_ptype; -use vortex_array::validity::Validity; +use vortex_array::vtable::ValidityHelper; use vortex_buffer::Buffer; use vortex_buffer::BufferMut; use vortex_error::VortexResult; use crate::DeltaArray; +use crate::bit_transpose::untranspose_validity; pub fn delta_decompress( array: &DeltaArray, @@ -28,10 +30,7 @@ pub fn delta_decompress( let start = array.offset(); let end = start + array.len(); - // TODO(connor): This is incorrect, we need to untranspose the validity!!! - - let validity = - Validity::from_mask(array.deltas().validity_mask()?, array.dtype().nullability()); + let validity = untranspose_validity(deltas.validity(), ctx)?; let validity = validity.slice(start..end)?; Ok(match_each_unsigned_integer_ptype!(deltas.ptype(), |T| { @@ -44,53 +43,41 @@ pub fn delta_decompress( })) } -// TODO(ngates): can we re-use the deltas buffer for the result? Might be tricky given the -// traversal ordering, but possibly doable. /// Performs the low-level delta decompression on primitive values. +/// +/// All chunks must be full 1024-element chunks (deltas length must be a multiple of 1024). pub(crate) fn decompress_primitive(bases: &[T], deltas: &[T]) -> Buffer where - T: NativePType + Delta + Transpose + WrappingAdd, + T: NativePType + Delta + Transpose, { - // How many fastlanes vectors we will process. - let num_chunks = deltas.len() / 1024; + let (chunks, remainder) = deltas.as_chunks::<1024>(); + debug_assert!( + remainder.is_empty(), + "deltas must be padded to a multiple of 1024" + ); + // Use >= because cross-type casts (e.g. u32→u64) may produce more bases than the + // target LANES requires. Only the first chunks.len() * LANES bases are used. + assert!(bases.len() >= chunks.len() * LANES); // Allocate a result array. let mut output = BufferMut::with_capacity(deltas.len()); + let (output_chunks, _) = output.spare_capacity_mut().as_chunks_mut::<1024>(); // Loop over all the chunks - if num_chunks > 0 { - let mut transposed: [T; 1024] = [T::default(); 1024]; - - for i in 0..num_chunks { - let start_elem = i * 1024; - let chunk: &[T; 1024] = array_ref![deltas, start_elem, 1024]; - - // Initialize the base vector for this chunk - Delta::undelta::( - chunk, - unsafe { &*(bases[i * LANES..(i + 1) * LANES].as_ptr().cast()) }, - &mut transposed, - ); - - let output_len = output.len(); - unsafe { output.set_len(output_len + 1024) } - Transpose::untranspose(&transposed, array_mut_ref![output[output_len..], 0, 1024]); - } + let mut transposed: [T; 1024] = [T::default(); 1024]; + for ((i, chunk), output_chunk) in chunks.iter().enumerate().zip_eq(output_chunks.iter_mut()) { + Delta::undelta::( + chunk, + unsafe { &*(bases[i * LANES..(i + 1) * LANES].as_ptr().cast()) }, + &mut transposed, + ); + + Transpose::untranspose(&transposed, unsafe { + mem::transmute::<&mut [MaybeUninit; 1024], &mut [T; 1024]>(output_chunk) + }); } - assert_eq!(output.len() % 1024, 0); - // The remainder was encoded with scalar logic, so we need to scalar decode it. - let remainder_size = deltas.len() % 1024; - if remainder_size > 0 { - let chunk = &deltas[num_chunks * 1024..]; - assert_eq!(bases.len(), num_chunks * LANES + 1); - let mut base_scalar = bases[num_chunks * LANES]; - for next_diff in chunk { - let next = next_diff.wrapping_add(&base_scalar); - output.push(next); - base_scalar = next; - } - } + unsafe { output.set_len(deltas.len()) }; output.freeze() } diff --git a/encodings/fastlanes/src/delta/array/mod.rs b/encodings/fastlanes/src/delta/array/mod.rs index 9b489ab95cf..673fa041dbd 100644 --- a/encodings/fastlanes/src/delta/array/mod.rs +++ b/encodings/fastlanes/src/delta/array/mod.rs @@ -3,15 +3,13 @@ use fastlanes::FastLanes; use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; use vortex_array::IntoArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::dtype::DType; -use vortex_array::dtype::NativePType; use vortex_array::dtype::PType; use vortex_array::match_each_unsigned_integer_ptype; use vortex_array::stats::ArrayStats; -use vortex_array::validity::Validity; -use vortex_buffer::Buffer; use vortex_error::VortexExpect as _; use vortex_error::VortexResult; use vortex_error::vortex_bail; @@ -21,14 +19,22 @@ pub mod delta_decompress; /// A FastLanes-style delta-encoded array of primitive values. /// -/// A [`DeltaArray`] comprises a sequence of _chunks_ each representing 1,024 delta-encoded values, -/// except the last chunk which may represent from one to 1,024 values. +/// A [`DeltaArray`] comprises a sequence of _chunks_ each representing exactly 1,024 +/// delta-encoded values. If the input array length is not a multiple of 1,024, the last chunk +/// is padded with zeros to fill a complete 1,024-element chunk. /// /// # Examples /// -/// ``` +/// ```no_run +/// use vortex_array::arrays::PrimitiveArray; +/// use vortex_array::VortexSessionExecute; +/// use vortex_array::session::ArraySession; +/// use vortex_session::VortexSession; /// use vortex_fastlanes::DeltaArray; -/// let array = DeltaArray::try_from_vec(vec![1_u32, 2, 3, 5, 10, 11]).unwrap(); +/// +/// let session = VortexSession::empty().with::(); +/// let primitive = PrimitiveArray::from_iter([1_u32, 2, 3, 5, 10, 11]); +/// let array = DeltaArray::try_from_primitive_array(&primitive, &mut session.create_execution_ctx()).unwrap(); /// ``` /// /// # Details @@ -41,17 +47,14 @@ pub mod delta_decompress; /// 1,024 values. The `logical_len` is the number of logical values following the `offset`, which /// may be less than the number of physically stored values. /// -/// Each chunk is stored as a vector of bases and a vector of deltas. If the chunk physically -/// contains 1,024 values, then there are as many bases as there are _lanes_ of this type in a -/// 1024-bit register. For example, for 64-bit values, there are 16 bases because there are 16 -/// _lanes_. Each lane is a [delta-encoding](https://en.wikipedia.org/wiki/Delta_encoding) `1024 / -/// bit_width` long vector of values. The deltas are stored in the +/// Each chunk is stored as a vector of bases and a vector of deltas. There are as many bases as +/// there are _lanes_ of this type in a 1024-bit register. For example, for 64-bit values, there +/// are 16 bases because there are 16 _lanes_. Each lane is a +/// [delta-encoding](https://en.wikipedia.org/wiki/Delta_encoding) `1024 / bit_width` long vector +/// of values. The deltas are stored in the /// [FastLanes](https://www.vldb.org/pvldb/vol16/p2132-afroozeh.pdf) order which splits the 1,024 /// values into one contiguous sub-sequence per-lane, thus permitting delta encoding. /// -/// If the chunk physically has fewer than 1,024 values, then it is stored as a traditional, -/// non-SIMD-amenable, delta-encoded vector. -/// /// Note the validity is stored in the deltas array. #[derive(Clone, Debug)] pub struct DeltaArray { @@ -64,18 +67,14 @@ pub struct DeltaArray { } impl DeltaArray { - // TODO(ngates): remove constructing from vec - pub fn try_from_vec(vec: Vec) -> VortexResult { - Self::try_from_primitive_array(&PrimitiveArray::new( - Buffer::copy_from(vec), - Validity::NonNullable, - )) - } - - pub fn try_from_primitive_array(array: &PrimitiveArray) -> VortexResult { - let (bases, deltas) = delta_compress::delta_compress(array)?; + pub fn try_from_primitive_array( + array: &PrimitiveArray, + ctx: &mut ExecutionCtx, + ) -> VortexResult { + let logical_len = array.len(); + let (bases, deltas) = delta_compress::delta_compress(array, ctx)?; - Self::try_from_delta_compress_parts(bases.into_array(), deltas.into_array()) + Self::try_new(bases.into_array(), deltas.into_array(), 0, logical_len) } /// Create a [`DeltaArray`] from the given `bases` and `deltas` arrays. @@ -122,10 +121,15 @@ impl DeltaArray { let lanes = lane_count(ptype); - if deltas.len().is_multiple_of(1024) != bases.len().is_multiple_of(lanes) { + if !deltas.len().is_multiple_of(1024) { vortex_bail!( - "deltas length ({}) is a multiple of 1024 iff bases length ({}) is a multiple of LANES ({})", + "deltas length ({}) must be a multiple of 1024", deltas.len(), + ); + } + if !bases.len().is_multiple_of(lanes) { + vortex_bail!( + "bases length ({}) must be a multiple of LANES ({})", bases.len(), lanes, ); diff --git a/encodings/fastlanes/src/delta/compute/cast.rs b/encodings/fastlanes/src/delta/compute/cast.rs index 16b39674a58..e93680e939a 100644 --- a/encodings/fastlanes/src/delta/compute/cast.rs +++ b/encodings/fastlanes/src/delta/compute/cast.rs @@ -36,17 +36,21 @@ impl CastReduce for Delta { let casted_bases = array.bases().cast(dtype.with_nullability(NonNullable))?; let casted_deltas = array.deltas().cast(dtype.clone())?; - // Create a new DeltaArray with the casted components + // Create a new DeltaArray with the casted components, preserving offset and logical length Ok(Some( - DeltaArray::try_from_delta_compress_parts(casted_bases, casted_deltas)?.into_array(), + DeltaArray::try_new(casted_bases, casted_deltas, array.offset(), array.len())? + .into_array(), )) } } #[cfg(test)] mod tests { + use std::sync::LazyLock; + use rstest::rstest; use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; use vortex_array::arrays::PrimitiveArray; use vortex_array::assert_arrays_eq; use vortex_array::builtins::ArrayBuiltins; @@ -54,14 +58,20 @@ mod tests { use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; + use vortex_array::session::ArraySession; use vortex_buffer::buffer; + use vortex_session::VortexSession; use crate::delta::DeltaArray; + static SESSION: LazyLock = + LazyLock::new(|| VortexSession::empty().with::()); #[test] fn test_cast_delta_u8_to_u32() { let primitive = PrimitiveArray::from_iter([10u8, 20, 30, 40, 50]); - let array = DeltaArray::try_from_primitive_array(&primitive).unwrap(); + let array = + DeltaArray::try_from_primitive_array(&primitive, &mut SESSION.create_execution_ctx()) + .unwrap(); let casted = array .into_array() @@ -84,7 +94,9 @@ mod tests { buffer![100u16, 0, 200, 300, 0], vortex_array::validity::Validity::NonNullable, ); - let array = DeltaArray::try_from_primitive_array(&values).unwrap(); + let array = + DeltaArray::try_from_primitive_array(&values, &mut SESSION.create_execution_ctx()) + .unwrap(); let casted = array .into_array() @@ -122,7 +134,9 @@ mod tests { ) )] fn test_cast_delta_conformance(#[case] primitive: PrimitiveArray) { - let delta_array = DeltaArray::try_from_primitive_array(&primitive).unwrap(); + let delta_array = + DeltaArray::try_from_primitive_array(&primitive, &mut SESSION.create_execution_ctx()) + .unwrap(); test_cast_conformance(&delta_array.into_array()); } } diff --git a/encodings/fastlanes/src/delta/vtable/mod.rs b/encodings/fastlanes/src/delta/vtable/mod.rs index 9ebbaebe729..790dedc6873 100644 --- a/encodings/fastlanes/src/delta/vtable/mod.rs +++ b/encodings/fastlanes/src/delta/vtable/mod.rs @@ -22,7 +22,6 @@ use vortex_array::stats::StatsSetRef; use vortex_array::vtable; use vortex_array::vtable::ArrayId; use vortex_array::vtable::VTable; -use vortex_array::vtable::ValidityVTableFromChildSliceHelper; use vortex_error::VortexResult; use vortex_error::vortex_ensure; use vortex_error::vortex_err; @@ -54,7 +53,7 @@ impl VTable for Delta { type Metadata = ProstMetadata; type OperationsVTable = Self; - type ValidityVTable = ValidityVTableFromChildSliceHelper; + type ValidityVTable = Self; fn id(_array: &Self::Array) -> ArrayId { Self::ID diff --git a/encodings/fastlanes/src/delta/vtable/operations.rs b/encodings/fastlanes/src/delta/vtable/operations.rs index f91ebccc15c..f691cb5018e 100644 --- a/encodings/fastlanes/src/delta/vtable/operations.rs +++ b/encodings/fastlanes/src/delta/vtable/operations.rs @@ -18,18 +18,32 @@ impl OperationsVTable for Delta { #[cfg(test)] mod tests { + use std::sync::LazyLock; + use rstest::rstest; use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; use vortex_array::arrays::PrimitiveArray; use vortex_array::assert_arrays_eq; use vortex_array::compute::conformance::binary_numeric::test_binary_numeric_array; use vortex_array::compute::conformance::consistency::test_array_consistency; + use vortex_array::session::ArraySession; + use vortex_array::validity::Validity; + use vortex_buffer::buffer; + use vortex_session::VortexSession; use crate::DeltaArray; + static SESSION: LazyLock = + LazyLock::new(|| VortexSession::empty().with::()); + #[test] fn test_slice_non_jagged_array_first_chunk_of_two() { - let delta = DeltaArray::try_from_vec((0u32..2048).collect()).unwrap(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..2048).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap(); let actual = delta.slice(10..250).unwrap(); let expected = PrimitiveArray::from_iter(10u32..250).into_array(); @@ -38,7 +52,11 @@ mod tests { #[test] fn test_slice_non_jagged_array_second_chunk_of_two() { - let delta = DeltaArray::try_from_vec((0u32..2048).collect()).unwrap(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..2048).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap(); let actual = delta.slice(1024 + 10..1024 + 250).unwrap(); let expected = PrimitiveArray::from_iter((1024 + 10u32)..(1024 + 250)).into_array(); @@ -47,7 +65,11 @@ mod tests { #[test] fn test_slice_non_jagged_array_span_two_chunks_chunk_of_two() { - let delta = DeltaArray::try_from_vec((0u32..2048).collect()).unwrap(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..2048).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap(); let actual = delta.slice(1000..1048).unwrap(); let expected = PrimitiveArray::from_iter(1000u32..1048).into_array(); @@ -56,7 +78,11 @@ mod tests { #[test] fn test_slice_non_jagged_array_span_two_chunks_chunk_of_four() { - let delta = DeltaArray::try_from_vec((0u32..4096).collect()).unwrap(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..4096).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap(); let actual = delta.slice(2040..2050).unwrap(); let expected = PrimitiveArray::from_iter(2040u32..2050).into_array(); @@ -65,7 +91,11 @@ mod tests { #[test] fn test_slice_non_jagged_array_whole() { - let delta = DeltaArray::try_from_vec((0u32..4096).collect()).unwrap(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..4096).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap(); let actual = delta.slice(0..4096).unwrap(); let expected = PrimitiveArray::from_iter(0u32..4096).into_array(); @@ -74,7 +104,11 @@ mod tests { #[test] fn test_slice_non_jagged_array_empty() { - let delta = DeltaArray::try_from_vec((0u32..4096).collect()).unwrap(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..4096).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap(); let actual = delta.slice(0..0).unwrap(); let expected = PrimitiveArray::from_iter(Vec::::new()).into_array(); @@ -91,7 +125,11 @@ mod tests { #[test] fn test_slice_jagged_array_second_chunk_of_two() { - let delta = DeltaArray::try_from_vec((0u32..2000).collect()).unwrap(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..2000).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap(); let actual = delta.slice(1024 + 10..1024 + 250).unwrap(); let expected = PrimitiveArray::from_iter((1024 + 10u32)..(1024 + 250)).into_array(); @@ -100,7 +138,11 @@ mod tests { #[test] fn test_slice_jagged_array_empty() { - let delta = DeltaArray::try_from_vec((0u32..4000).collect()).unwrap(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..4000).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap(); let actual = delta.slice(0..0).unwrap(); let expected = PrimitiveArray::from_iter(Vec::::new()).into_array(); @@ -117,7 +159,11 @@ mod tests { #[test] fn test_slice_of_slice_of_non_jagged() { - let delta = DeltaArray::try_from_vec((0u32..2048).collect()).unwrap(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..2048).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap(); let sliced = delta.slice(10..1013).unwrap(); let sliced_again = sliced.slice(0..2).unwrap(); @@ -128,7 +174,11 @@ mod tests { #[test] fn test_slice_of_slice_of_jagged() { - let delta = DeltaArray::try_from_vec((0u32..2000).collect()).unwrap(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..2000).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap(); let sliced = delta.slice(10..1013).unwrap(); let sliced_again = sliced.slice(0..2).unwrap(); @@ -139,7 +189,11 @@ mod tests { #[test] fn test_slice_of_slice_second_chunk_of_non_jagged() { - let delta = DeltaArray::try_from_vec((0u32..2048).collect()).unwrap(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..2048).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap(); let sliced = delta.slice(1034..1050).unwrap(); let sliced_again = sliced.slice(0..2).unwrap(); @@ -150,7 +204,11 @@ mod tests { #[test] fn test_slice_of_slice_second_chunk_of_jagged() { - let delta = DeltaArray::try_from_vec((0u32..2000).collect()).unwrap(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..2000).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap(); let sliced = delta.slice(1034..1050).unwrap(); let sliced_again = sliced.slice(0..2).unwrap(); @@ -161,7 +219,11 @@ mod tests { #[test] fn test_slice_of_slice_spanning_two_chunks_of_non_jagged() { - let delta = DeltaArray::try_from_vec((0u32..2048).collect()).unwrap(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..2048).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap(); let sliced = delta.slice(1010..1050).unwrap(); let sliced_again = sliced.slice(5..20).unwrap(); @@ -172,7 +234,11 @@ mod tests { #[test] fn test_slice_of_slice_spanning_two_chunks_of_jagged() { - let delta = DeltaArray::try_from_vec((0u32..2000).collect()).unwrap(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..2000).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap(); let sliced = delta.slice(1010..1050).unwrap(); let sliced_again = sliced.slice(5..20).unwrap(); @@ -183,9 +249,12 @@ mod tests { #[test] fn test_scalar_at_non_jagged_array() { - let delta = DeltaArray::try_from_vec((0u32..2048).collect()) - .unwrap() - .into_array(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..2048).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .into_array(); let expected = PrimitiveArray::from_iter(0u32..2048).into_array(); assert_arrays_eq!(delta, expected); @@ -194,16 +263,22 @@ mod tests { #[test] #[should_panic] fn test_scalar_at_non_jagged_array_oob() { - let delta = DeltaArray::try_from_vec((0u32..2048).collect()) - .unwrap() - .into_array(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..2048).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .into_array(); delta.scalar_at(2048).unwrap(); } #[test] fn test_scalar_at_jagged_array() { - let delta = DeltaArray::try_from_vec((0u32..2000).collect()) - .unwrap() - .into_array(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..2000).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .into_array(); let expected = PrimitiveArray::from_iter(0u32..2000).into_array(); assert_arrays_eq!(delta, expected); @@ -212,32 +287,43 @@ mod tests { #[test] #[should_panic] fn test_scalar_at_jagged_array_oob() { - let delta = DeltaArray::try_from_vec((0u32..2000).collect()) - .unwrap() - .into_array(); + let delta = DeltaArray::try_from_primitive_array( + &(0u32..2000).collect(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .into_array(); delta.scalar_at(2000).unwrap(); } #[rstest] // Basic delta arrays - #[case::delta_u32(DeltaArray::try_from_vec((0u32..100).collect()).unwrap())] - #[case::delta_u64(DeltaArray::try_from_vec((0..100).map(|i| i as u64 * 10).collect()).unwrap())] + #[case::delta_u32((0u32..100).collect())] + #[case::delta_u64((0..100).map(|i| i as u64 * 10).collect())] // Large arrays (multiple chunks) - #[case::delta_large_u32(DeltaArray::try_from_vec((0u32..2048).collect()).unwrap())] - #[case::delta_large_u64(DeltaArray::try_from_vec((0u64..2048).collect()).unwrap())] + #[case::delta_large_u32((0u32..2048).collect())] + #[case::delta_large_u64((0u64..2048).collect())] // Single element - #[case::delta_single(DeltaArray::try_from_vec(vec![42u32]).unwrap())] - fn test_delta_consistency(#[case] array: DeltaArray) { - test_array_consistency(&array.into_array()); + #[case::delta_single(PrimitiveArray::new(buffer![42u32], Validity::NonNullable))] + fn test_delta_consistency(#[case] array: PrimitiveArray) { + test_array_consistency( + &DeltaArray::try_from_primitive_array(&array, &mut SESSION.create_execution_ctx()) + .unwrap() + .into_array(), + ); } #[rstest] - #[case::delta_u8_basic(DeltaArray::try_from_vec(vec![1u8, 1, 1, 1, 1]).unwrap())] - #[case::delta_u16_basic(DeltaArray::try_from_vec(vec![1u16, 1, 1, 1, 1]).unwrap())] - #[case::delta_u32_basic(DeltaArray::try_from_vec(vec![1u32, 1, 1, 1, 1]).unwrap())] - #[case::delta_u64_basic(DeltaArray::try_from_vec(vec![1u64, 1, 1, 1, 1]).unwrap())] - #[case::delta_u32_large(DeltaArray::try_from_vec(vec![1u32; 100]).unwrap())] - fn test_delta_binary_numeric(#[case] array: DeltaArray) { - test_binary_numeric_array(array.into_array()); + #[case::delta_u8_basic(PrimitiveArray::new(buffer![1u8, 1, 1, 1, 1], Validity::NonNullable))] + #[case::delta_u16_basic(PrimitiveArray::new(buffer![1u16, 1, 1, 1, 1], Validity::NonNullable))] + #[case::delta_u32_basic(PrimitiveArray::new(buffer![1u32, 1, 1, 1, 1], Validity::NonNullable))] + #[case::delta_u64_basic(PrimitiveArray::new(buffer![1u64, 1, 1, 1, 1], Validity::NonNullable))] + #[case::delta_u32_large(PrimitiveArray::new(buffer![1u32; 100], Validity::NonNullable))] + fn test_delta_binary_numeric(#[case] array: PrimitiveArray) { + test_binary_numeric_array( + DeltaArray::try_from_primitive_array(&array, &mut SESSION.create_execution_ctx()) + .unwrap() + .into_array(), + ); } } diff --git a/encodings/fastlanes/src/delta/vtable/validity.rs b/encodings/fastlanes/src/delta/vtable/validity.rs index 71b930025c6..17dc2063206 100644 --- a/encodings/fastlanes/src/delta/vtable/validity.rs +++ b/encodings/fastlanes/src/delta/vtable/validity.rs @@ -1,14 +1,25 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_array::ArrayRef; -use vortex_array::vtable::ValidityChildSliceHelper; +use vortex_array::LEGACY_SESSION; +use vortex_array::VortexSessionExecute; +use vortex_array::validity::Validity; +use vortex_array::vtable::ValidityVTable; +use vortex_error::VortexResult; +use crate::Delta; use crate::DeltaArray; +use crate::bit_transpose::untranspose_validity; -impl ValidityChildSliceHelper for DeltaArray { - fn unsliced_child_and_slice(&self) -> (&ArrayRef, usize, usize) { - let (start, len) = (self.offset(), self.len()); - (self.deltas(), start, start + len) +impl ValidityVTable for Delta { + fn validity(array: &DeltaArray) -> VortexResult { + let start = array.offset(); + let end = start + array.len(); + + let validity = untranspose_validity( + &array.deltas().validity()?, + &mut LEGACY_SESSION.create_execution_ctx(), + )?; + validity.slice(start..end) } } diff --git a/vortex-array/src/arrays/assertions.rs b/vortex-array/src/arrays/assertions.rs index 07fe481d02c..89f70cdd56f 100644 --- a/vortex-array/src/arrays/assertions.rs +++ b/vortex-array/src/arrays/assertions.rs @@ -58,23 +58,27 @@ macro_rules! assert_arrays_eq { right.dtype(), left.display_values(), right.display_values() - ) - } + ) + } - if left.len() != right.len() { - panic!( - "assertion left == right failed: arrays differ in length: {} != {}.\n left: {}\n right: {}", - left.len(), - right.len(), - left.display_values(), - right.display_values() - ) - } - let n = left.len(); - let mismatched_indices = (0..n) - .filter(|i| left.scalar_at(*i).unwrap() != right.scalar_at(*i).unwrap()) + if left.len() != right.len() { + panic!( + "assertion left == right failed: arrays differ in length: {} != {}.\n left: {}\n right: {}", + left.len(), + right.len(), + left.display_values(), + right.display_values() + ) + } + + let n = left.len(); + let mismatched_indices = (0..n) + .filter(|i| left.scalar_at(*i).unwrap() != right.scalar_at(*i).unwrap()) .collect::>(); - if mismatched_indices.len() != 0 { + if mismatched_indices.len() != 0 { + eprintln!("mismatched values: {:?}", mismatched_indices.iter() + .map(|i| (left.scalar_at(*i).unwrap(), right.scalar_at(*i).unwrap())) + .collect::>()); panic!( "assertion left == right failed: arrays do not match at indices: {}.\n left: {}\n right: {}", $crate::arrays::format_indices(mismatched_indices), diff --git a/vortex-buffer/src/bit/buf.rs b/vortex-buffer/src/bit/buf.rs index 89ddc55f3c8..df891febb81 100644 --- a/vortex-buffer/src/bit/buf.rs +++ b/vortex-buffer/src/bit/buf.rs @@ -379,7 +379,6 @@ impl BitBuffer { /// The second value of the tuple is a bit_offset of the first value in the first byte pub fn into_mut(self) -> BitBufferMut { let (offset, len, inner) = self.into_inner(); - // TODO(robert): if we are copying here we could strip offset bits BitBufferMut::from_buffer(inner.into_mut(), offset, len) } } diff --git a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/delta.rs b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/delta.rs index 8b268a08456..88bc8d38eb2 100644 --- a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/delta.rs +++ b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/delta.rs @@ -1,8 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use vortex::VortexSessionDefault; use vortex::array::ArrayRef; use vortex::array::IntoArray; +use vortex::array::VortexSessionExecute; use vortex::array::arrays::PrimitiveArray; use vortex::array::arrays::StructArray; use vortex::array::dtype::FieldNames; @@ -11,6 +13,7 @@ use vortex::array::vtable::ArrayId; use vortex::encodings::fastlanes::Delta; use vortex::encodings::fastlanes::DeltaArray; use vortex::error::VortexResult; +use vortex_session::VortexSession; use super::N; use crate::fixtures::FlatLayoutFixture; @@ -31,6 +34,9 @@ impl FlatLayoutFixture for DeltaFixture { } fn build(&self) -> VortexResult { + let session = VortexSession::default(); + let mut ctx = session.create_execution_ctx(); + let monotonic_u64: PrimitiveArray = (0..N as u64).map(|i| i * 3 + 1000).collect(); let constant_delta_u32: PrimitiveArray = (0..N as u32).collect(); let large_stride_u64: PrimitiveArray = (0..N as u64).map(|i| i * 1_000_000).collect(); @@ -70,16 +76,16 @@ impl FlatLayoutFixture for DeltaFixture { "nullable_monotone", ]), vec![ - DeltaArray::try_from_primitive_array(&monotonic_u64)?.into_array(), - DeltaArray::try_from_primitive_array(&constant_delta_u32)?.into_array(), - DeltaArray::try_from_primitive_array(&large_stride_u64)?.into_array(), - DeltaArray::try_from_primitive_array(&monotonic_u16)?.into_array(), - DeltaArray::try_from_primitive_array(&monotonic_u8)?.into_array(), - DeltaArray::try_from_primitive_array(&large_base_u64)?.into_array(), - DeltaArray::try_from_primitive_array(&all_zero_deltas)?.into_array(), - DeltaArray::try_from_primitive_array(&irregular_monotone)?.into_array(), - DeltaArray::try_from_primitive_array(&near_overflow_base)?.into_array(), - DeltaArray::try_from_primitive_array(&nullable_monotone)?.into_array(), + DeltaArray::try_from_primitive_array(&monotonic_u64, &mut ctx)?.into_array(), + DeltaArray::try_from_primitive_array(&constant_delta_u32, &mut ctx)?.into_array(), + DeltaArray::try_from_primitive_array(&large_stride_u64, &mut ctx)?.into_array(), + DeltaArray::try_from_primitive_array(&monotonic_u16, &mut ctx)?.into_array(), + DeltaArray::try_from_primitive_array(&monotonic_u8, &mut ctx)?.into_array(), + DeltaArray::try_from_primitive_array(&large_base_u64, &mut ctx)?.into_array(), + DeltaArray::try_from_primitive_array(&all_zero_deltas, &mut ctx)?.into_array(), + DeltaArray::try_from_primitive_array(&irregular_monotone, &mut ctx)?.into_array(), + DeltaArray::try_from_primitive_array(&near_overflow_base, &mut ctx)?.into_array(), + DeltaArray::try_from_primitive_array(&nullable_monotone, &mut ctx)?.into_array(), ], N, Validity::NonNullable, diff --git a/vortex/benches/single_encoding_throughput.rs b/vortex/benches/single_encoding_throughput.rs index 720293158e3..cd583997ee2 100644 --- a/vortex/benches/single_encoding_throughput.rs +++ b/vortex/benches/single_encoding_throughput.rs @@ -5,6 +5,8 @@ #![allow(clippy::cast_possible_truncation)] #![allow(unexpected_cfgs)] +use std::sync::LazyLock; + use divan::Bencher; #[cfg(not(codspeed))] use divan::counter::BytesCount; @@ -32,12 +34,19 @@ use vortex::encodings::runend::RunEndArray; use vortex::encodings::sequence::sequence_encode; use vortex::encodings::zigzag::zigzag_encode; use vortex::encodings::zstd::ZstdArray; +use vortex_array::LEGACY_SESSION; +use vortex_array::VortexSessionExecute; use vortex_array::dtype::Nullability; +use vortex_array::session::ArraySession; use vortex_sequence::SequenceArray; +use vortex_session::VortexSession; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; +static SESSION: LazyLock = + LazyLock::new(|| VortexSession::empty().with::()); + fn main() { divan::main(); } @@ -143,7 +152,7 @@ fn bench_delta_compress_u32(bencher: Bencher) { with_byte_counter(bencher, NUM_VALUES * 4) .with_inputs(|| &uint_array) .bench_refs(|a| { - let (bases, deltas) = delta_compress(a).unwrap(); + let (bases, deltas) = delta_compress(a, &mut SESSION.create_execution_ctx()).unwrap(); DeltaArray::try_from_delta_compress_parts(bases.into_array(), deltas.into_array()) .unwrap() }); @@ -152,7 +161,7 @@ fn bench_delta_compress_u32(bencher: Bencher) { #[divan::bench(name = "delta_decompress_u32")] fn bench_delta_decompress_u32(bencher: Bencher) { let (uint_array, ..) = setup_primitive_arrays(); - let (bases, deltas) = delta_compress(&uint_array).unwrap(); + let (bases, deltas) = delta_compress(&uint_array, &mut SESSION.create_execution_ctx()).unwrap(); let compressed = DeltaArray::try_from_delta_compress_parts(bases.into_array(), deltas.into_array()).unwrap(); From 919fb425f437254949fdc12b7b772cb534fd8c8e Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 17 Mar 2026 18:50:27 -0700 Subject: [PATCH 02/18] api Signed-off-by: Robert Kruszewski --- encodings/fastlanes/public-api.lock | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/encodings/fastlanes/public-api.lock b/encodings/fastlanes/public-api.lock index b8086473960..369aec2cbbe 100644 --- a/encodings/fastlanes/public-api.lock +++ b/encodings/fastlanes/public-api.lock @@ -2,10 +2,18 @@ pub mod vortex_fastlanes pub mod vortex_fastlanes::bit_transpose +pub fn vortex_fastlanes::bit_transpose::transpose_bitbuffer(bits: vortex_buffer::bit::buf::BitBuffer) -> vortex_buffer::bit::buf::BitBuffer + pub fn vortex_fastlanes::bit_transpose::transpose_bits(input: &[u8; 128], output: &mut [u8; 128]) +pub fn vortex_fastlanes::bit_transpose::transpose_validity(validity: &vortex_array::validity::Validity, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_fastlanes::bit_transpose::untranspose_bitbuffer(bits: vortex_buffer::bit::buf::BitBuffer) -> vortex_buffer::bit::buf::BitBuffer + pub fn vortex_fastlanes::bit_transpose::untranspose_bits(input: &[u8; 128], output: &mut [u8; 128]) +pub fn vortex_fastlanes::bit_transpose::untranspose_validity(validity: &vortex_array::validity::Validity, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult + pub mod vortex_fastlanes::bitpack_compress pub fn vortex_fastlanes::bitpack_compress::bit_width_histogram(array: &vortex_array::arrays::primitive::array::PrimitiveArray) -> vortex_error::VortexResult> @@ -296,7 +304,7 @@ pub type vortex_fastlanes::Delta::Metadata = vortex_array::metadata::ProstMetada pub type vortex_fastlanes::Delta::OperationsVTable = vortex_fastlanes::Delta -pub type vortex_fastlanes::Delta::ValidityVTable = vortex_array::vtable::validity::ValidityVTableFromChildSliceHelper +pub type vortex_fastlanes::Delta::ValidityVTable = vortex_fastlanes::Delta pub fn vortex_fastlanes::Delta::array_eq(array: &vortex_fastlanes::DeltaArray, other: &vortex_fastlanes::DeltaArray, precision: vortex_array::hash::Precision) -> bool @@ -340,6 +348,10 @@ impl vortex_array::vtable::operations::OperationsVTable pub fn vortex_fastlanes::Delta::scalar_at(array: &vortex_fastlanes::DeltaArray, index: usize) -> vortex_error::VortexResult +impl vortex_array::vtable::validity::ValidityVTable for vortex_fastlanes::Delta + +pub fn vortex_fastlanes::Delta::validity(array: &vortex_fastlanes::DeltaArray) -> vortex_error::VortexResult + pub struct vortex_fastlanes::DeltaArray impl vortex_fastlanes::DeltaArray @@ -358,9 +370,7 @@ pub fn vortex_fastlanes::DeltaArray::offset(&self) -> usize pub fn vortex_fastlanes::DeltaArray::try_from_delta_compress_parts(bases: vortex_array::array::ArrayRef, deltas: vortex_array::array::ArrayRef) -> vortex_error::VortexResult -pub fn vortex_fastlanes::DeltaArray::try_from_primitive_array(array: &vortex_array::arrays::primitive::array::PrimitiveArray) -> vortex_error::VortexResult - -pub fn vortex_fastlanes::DeltaArray::try_from_vec(vec: alloc::vec::Vec) -> vortex_error::VortexResult +pub fn vortex_fastlanes::DeltaArray::try_from_primitive_array(array: &vortex_array::arrays::primitive::array::PrimitiveArray, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult pub fn vortex_fastlanes::DeltaArray::try_new(bases: vortex_array::array::ArrayRef, deltas: vortex_array::array::ArrayRef, offset: usize, logical_len: usize) -> vortex_error::VortexResult @@ -394,10 +404,6 @@ impl vortex_array::array::IntoArray for vortex_fastlanes::DeltaArray pub fn vortex_fastlanes::DeltaArray::into_array(self) -> vortex_array::array::ArrayRef -impl vortex_array::vtable::validity::ValidityChildSliceHelper for vortex_fastlanes::DeltaArray - -pub fn vortex_fastlanes::DeltaArray::unsliced_child_and_slice(&self) -> (&vortex_array::array::ArrayRef, usize, usize) - pub struct vortex_fastlanes::FoR impl vortex_fastlanes::FoR @@ -668,6 +674,10 @@ impl vortex_array::vtable::validity::ValidityChildSliceHelper for vortex_fastlan pub fn vortex_fastlanes::RLEArray::unsliced_child_and_slice(&self) -> (&vortex_array::array::ArrayRef, usize, usize) +<<<<<<< HEAD pub fn vortex_fastlanes::delta_compress(array: &vortex_array::arrays::primitive::array::PrimitiveArray) -> vortex_error::VortexResult<(vortex_array::arrays::primitive::array::PrimitiveArray, vortex_array::arrays::primitive::array::PrimitiveArray)> pub fn vortex_fastlanes::initialize(session: &mut vortex_session::VortexSession) +======= +pub fn vortex_fastlanes::delta_compress(array: &vortex_array::arrays::primitive::array::PrimitiveArray, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<(vortex_array::arrays::primitive::array::PrimitiveArray, vortex_array::arrays::primitive::array::PrimitiveArray)> +>>>>>>> 8e20c980d (api) From 7751ca5e6912d933e15380900b14a3f57eab4b9b Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 17 Mar 2026 18:52:06 -0700 Subject: [PATCH 03/18] format Signed-off-by: Robert Kruszewski --- encodings/fastlanes/src/delta/array/delta_compress.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/encodings/fastlanes/src/delta/array/delta_compress.rs b/encodings/fastlanes/src/delta/array/delta_compress.rs index 93d730c1a65..e2d8c979254 100644 --- a/encodings/fastlanes/src/delta/array/delta_compress.rs +++ b/encodings/fastlanes/src/delta/array/delta_compress.rs @@ -78,7 +78,7 @@ fn compress_primitive( let bases_len = num_chunks * LANES; // Split into full 1024-element chunks and a remainder. - let (full_chunks, remainder) = array.as_chunks:: <1024>(); + let (full_chunks, remainder) = array.as_chunks::<1024>(); // Allocate result arrays. let mut bases = BufferMut::with_capacity(bases_len); From ab9670038a59b00124d41d6f78d5286dc552bce6 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 17 Mar 2026 22:00:34 -0700 Subject: [PATCH 04/18] lint Signed-off-by: Robert Kruszewski --- vortex/benches/single_encoding_throughput.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/vortex/benches/single_encoding_throughput.rs b/vortex/benches/single_encoding_throughput.rs index cd583997ee2..1c534eb1325 100644 --- a/vortex/benches/single_encoding_throughput.rs +++ b/vortex/benches/single_encoding_throughput.rs @@ -34,7 +34,6 @@ use vortex::encodings::runend::RunEndArray; use vortex::encodings::sequence::sequence_encode; use vortex::encodings::zigzag::zigzag_encode; use vortex::encodings::zstd::ZstdArray; -use vortex_array::LEGACY_SESSION; use vortex_array::VortexSessionExecute; use vortex_array::dtype::Nullability; use vortex_array::session::ArraySession; From fe46926ed8d672ba77e367fb00b59a9374f66283 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 17 Mar 2026 22:15:37 -0700 Subject: [PATCH 05/18] usedelta Signed-off-by: Robert Kruszewski --- vortex-btrblocks/src/compressor/rle.rs | 33 ++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/vortex-btrblocks/src/compressor/rle.rs b/vortex-btrblocks/src/compressor/rle.rs index c0cb20780bb..6c409f1f18b 100644 --- a/vortex-btrblocks/src/compressor/rle.rs +++ b/vortex-btrblocks/src/compressor/rle.rs @@ -8,10 +8,14 @@ use std::marker::PhantomData; use vortex_array::ArrayRef; use vortex_array::Canonical; use vortex_array::IntoArray; +use vortex_array::LEGACY_SESSION; use vortex_array::ToCanonical; +use vortex_array::VortexSessionExecute; use vortex_array::arrays::PrimitiveArray; use vortex_error::VortexResult; +use vortex_fastlanes::DeltaArray; use vortex_fastlanes::RLEArray; +use vortex_fastlanes::delta_compress; use crate::BtrBlocksCompressor; use crate::CanonicalCompressor; @@ -131,6 +135,16 @@ impl Scheme for RLEScheme { &new_excludes, )?; + // Delta in an unstable encoding, once we deem it stable we can switch over to this always. + #[cfg(feature = "unstable_encodings")] + let compressed_indices = try_compress_delta( + &rle_array.indices().to_primitive().narrow()?, + compressor, + ctx.descend(), + Excludes::from(&[IntCode::Dict]), + )?; + + #[cfg(not(feature = "unstable_encodings"))] let compressed_indices = compressor.compress_canonical( Canonical::Primitive(rle_array.indices().to_primitive().narrow()?), ctx.descend(), @@ -157,3 +171,22 @@ impl Scheme for RLEScheme { } } } + +#[cfg(feature = "unstable_encodings")] +fn try_compress_delta( + primitive_array: &PrimitiveArray, + compressor: &BtrBlocksCompressor, + ctx: CompressorContext, + excludes: Excludes, +) -> VortexResult { + let (bases, deltas) = + delta_compress(primitive_array, &mut LEGACY_SESSION.create_execution_ctx())?; + + let compressed_bases = + compressor.compress_canonical(Canonical::Primitive(bases), ctx, excludes)?; + let compressed_deltas = + compressor.compress_canonical(Canonical::Primitive(deltas), ctx, excludes)?; + + DeltaArray::try_from_delta_compress_parts(compressed_bases, compressed_deltas) + .map(DeltaArray::into_array) +} From ab5601f6f2708ca7b4950651db29dd2466914e85 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 17 Mar 2026 22:31:59 -0700 Subject: [PATCH 06/18] imports Signed-off-by: Robert Kruszewski --- vortex-btrblocks/src/compressor/rle.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vortex-btrblocks/src/compressor/rle.rs b/vortex-btrblocks/src/compressor/rle.rs index 6c409f1f18b..ef4b3fcb048 100644 --- a/vortex-btrblocks/src/compressor/rle.rs +++ b/vortex-btrblocks/src/compressor/rle.rs @@ -8,14 +8,10 @@ use std::marker::PhantomData; use vortex_array::ArrayRef; use vortex_array::Canonical; use vortex_array::IntoArray; -use vortex_array::LEGACY_SESSION; use vortex_array::ToCanonical; -use vortex_array::VortexSessionExecute; use vortex_array::arrays::PrimitiveArray; use vortex_error::VortexResult; -use vortex_fastlanes::DeltaArray; use vortex_fastlanes::RLEArray; -use vortex_fastlanes::delta_compress; use crate::BtrBlocksCompressor; use crate::CanonicalCompressor; @@ -179,14 +175,18 @@ fn try_compress_delta( ctx: CompressorContext, excludes: Excludes, ) -> VortexResult { - let (bases, deltas) = - delta_compress(primitive_array, &mut LEGACY_SESSION.create_execution_ctx())?; + use vortex_array::VortexSessionExecute; + + let (bases, deltas) = vortex_fastlanes::delta_compress( + primitive_array, + &mut vortex_array::LEGACY_SESSION.create_execution_ctx(), + )?; let compressed_bases = compressor.compress_canonical(Canonical::Primitive(bases), ctx, excludes)?; let compressed_deltas = compressor.compress_canonical(Canonical::Primitive(deltas), ctx, excludes)?; - DeltaArray::try_from_delta_compress_parts(compressed_bases, compressed_deltas) - .map(DeltaArray::into_array) + vortex_fastlanes::DeltaArray::try_from_delta_compress_parts(compressed_bases, compressed_deltas) + .map(vortex_fastlanes::DeltaArray::into_array) } From 098b4b59b6d5a3e907b7835c92ab92b37e4d595d Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 17 Mar 2026 22:54:17 -0700 Subject: [PATCH 07/18] compress Signed-off-by: Robert Kruszewski --- vortex-btrblocks/src/compressor/integer/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vortex-btrblocks/src/compressor/integer/mod.rs b/vortex-btrblocks/src/compressor/integer/mod.rs index 2c46e4cdd80..38db92602d1 100644 --- a/vortex-btrblocks/src/compressor/integer/mod.rs +++ b/vortex-btrblocks/src/compressor/integer/mod.rs @@ -1090,7 +1090,7 @@ mod scheme_selection_tests { use vortex_sequence::Sequence; use vortex_sparse::Sparse; - use crate::BtrBlocksCompressor; + use crate::{BtrBlocksCompressor, IntCode}; use crate::CompressorContext; use crate::CompressorExt; @@ -1205,14 +1205,14 @@ mod scheme_selection_tests { #[test] fn test_rle_compressed() -> VortexResult<()> { let mut values: Vec = Vec::new(); - for i in 0..10 { - values.extend(iter::repeat_n(i, 100)); + for i in 0..1024 { + values.extend(iter::repeat_n(i, 10)); } let array = PrimitiveArray::new(Buffer::copy_from(&values), Validity::NonNullable); let btr = BtrBlocksCompressor::default(); let compressed = btr.integer_compressor() - .compress(&btr, &array, CompressorContext::default(), &[])?; + .compress(&btr, &array, CompressorContext::default(), &[IntCode::RunEnd])?; assert!(compressed.is::()); Ok(()) } From 6574840c1599ebc6f453263f312384cd600fd780 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 17 Mar 2026 22:55:07 -0700 Subject: [PATCH 08/18] fixes Signed-off-by: Robert Kruszewski --- vortex-btrblocks/src/compressor/integer/mod.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/vortex-btrblocks/src/compressor/integer/mod.rs b/vortex-btrblocks/src/compressor/integer/mod.rs index 38db92602d1..58ee4f62e76 100644 --- a/vortex-btrblocks/src/compressor/integer/mod.rs +++ b/vortex-btrblocks/src/compressor/integer/mod.rs @@ -1090,9 +1090,10 @@ mod scheme_selection_tests { use vortex_sequence::Sequence; use vortex_sparse::Sparse; - use crate::{BtrBlocksCompressor, IntCode}; + use crate::BtrBlocksCompressor; use crate::CompressorContext; use crate::CompressorExt; + use crate::IntCode; #[test] fn test_constant_compressed() -> VortexResult<()> { @@ -1210,9 +1211,12 @@ mod scheme_selection_tests { } let array = PrimitiveArray::new(Buffer::copy_from(&values), Validity::NonNullable); let btr = BtrBlocksCompressor::default(); - let compressed = - btr.integer_compressor() - .compress(&btr, &array, CompressorContext::default(), &[IntCode::RunEnd])?; + let compressed = btr.integer_compressor().compress( + &btr, + &array, + CompressorContext::default(), + &[IntCode::RunEnd], + )?; assert!(compressed.is::()); Ok(()) } From db898d3005a8676e970a2a7cfe29b8b5f5966755 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Wed, 18 Mar 2026 21:37:08 -0700 Subject: [PATCH 09/18] format Signed-off-by: Robert Kruszewski --- vortex-array/src/arrays/assertions.rs | 39 +++++++++++++-------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/vortex-array/src/arrays/assertions.rs b/vortex-array/src/arrays/assertions.rs index 89f70cdd56f..ef409f2af23 100644 --- a/vortex-array/src/arrays/assertions.rs +++ b/vortex-array/src/arrays/assertions.rs @@ -49,36 +49,33 @@ macro_rules! assert_nth_scalar_is_null { #[macro_export] macro_rules! assert_arrays_eq { ($left:expr, $right:expr) => {{ - let left = $left.clone(); - let right = $right.clone(); - if left.dtype() != right.dtype() { + let left = $left.clone(); + let right = $right.clone(); + if left.dtype() != right.dtype() { panic!( "assertion left == right failed: arrays differ in type: {} != {}.\n left: {}\n right: {}", left.dtype(), right.dtype(), left.display_values(), right.display_values() - ) - } + ) + } - if left.len() != right.len() { - panic!( - "assertion left == right failed: arrays differ in length: {} != {}.\n left: {}\n right: {}", - left.len(), - right.len(), - left.display_values(), - right.display_values() - ) - } + if left.len() != right.len() { + panic!( + "assertion left == right failed: arrays differ in length: {} != {}.\n left: {}\n right: {}", + left.len(), + right.len(), + left.display_values(), + right.display_values() + ) + } - let n = left.len(); - let mismatched_indices = (0..n) - .filter(|i| left.scalar_at(*i).unwrap() != right.scalar_at(*i).unwrap()) + let n = left.len(); + let mismatched_indices = (0..n) + .filter(|i| left.scalar_at(*i).unwrap() != right.scalar_at(*i).unwrap()) .collect::>(); - if mismatched_indices.len() != 0 { - eprintln!("mismatched values: {:?}", mismatched_indices.iter() - .map(|i| (left.scalar_at(*i).unwrap(), right.scalar_at(*i).unwrap())) - .collect::>()); + if mismatched_indices.len() != 0 { panic!( "assertion left == right failed: arrays do not match at indices: {}.\n left: {}\n right: {}", $crate::arrays::format_indices(mismatched_indices), From 1693cc3ac39431c6ce58aa9643ad1990cebf0c5f Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Wed, 18 Mar 2026 21:44:01 -0700 Subject: [PATCH 10/18] ensure Signed-off-by: Robert Kruszewski --- encodings/fastlanes/src/delta/array/mod.rs | 82 +++++++++------------- 1 file changed, 34 insertions(+), 48 deletions(-) diff --git a/encodings/fastlanes/src/delta/array/mod.rs b/encodings/fastlanes/src/delta/array/mod.rs index 673fa041dbd..2fd74620832 100644 --- a/encodings/fastlanes/src/delta/array/mod.rs +++ b/encodings/fastlanes/src/delta/array/mod.rs @@ -3,6 +3,7 @@ use fastlanes::FastLanes; use vortex_array::ArrayRef; +use vortex_array::DynArray; use vortex_array::ExecutionCtx; use vortex_array::IntoArray; use vortex_array::arrays::PrimitiveArray; @@ -12,7 +13,7 @@ use vortex_array::match_each_unsigned_integer_ptype; use vortex_array::stats::ArrayStats; use vortex_error::VortexExpect as _; use vortex_error::VortexResult; -use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; pub mod delta_compress; pub mod delta_decompress; @@ -90,50 +91,37 @@ impl DeltaArray { offset: usize, logical_len: usize, ) -> VortexResult { - if offset >= 1024 { - vortex_bail!("offset must be less than 1024: {}", offset); - } - if offset + logical_len > deltas.len() { - vortex_bail!( - "offset + logical_len, {} + {}, must be less than or equal to the size of deltas: {}", - offset, - logical_len, - deltas.len() - ) - } - if !bases.dtype().eq_ignore_nullability(deltas.dtype()) { - vortex_bail!( - "DeltaArray: bases and deltas must have the same dtype, got {:?} and {:?}", - bases.dtype(), - deltas.dtype() - ); - } - let DType::Primitive(ptype, _) = bases.dtype().clone() else { - vortex_bail!( - "DeltaArray: dtype must be an integer, got {}", - bases.dtype() - ); - }; - - if !ptype.is_int() { - vortex_bail!("DeltaArray: ptype must be an integer, got {}", ptype); - } - - let lanes = lane_count(ptype); - - if !deltas.len().is_multiple_of(1024) { - vortex_bail!( - "deltas length ({}) must be a multiple of 1024", - deltas.len(), - ); - } - if !bases.len().is_multiple_of(lanes) { - vortex_bail!( - "bases length ({}) must be a multiple of LANES ({})", - bases.len(), - lanes, - ); - } + vortex_ensure!(offset < 1024, "offset must be less than 1024: {offset}"); + vortex_ensure!( + offset + logical_len <= deltas.len(), + "offset + logical_len, {offset} + {logical_len}, must be less than or equal to the size of deltas: {}", + deltas.len() + ); + vortex_ensure!( + bases.dtype().eq_ignore_nullability(deltas.dtype()), + "DeltaArray: bases and deltas must have the same dtype, got {} and {}", + bases.dtype(), + deltas.dtype() + ); + + vortex_ensure!( + bases.dtype().is_int(), + "DeltaArray: dtype must be an integer, got {}", + bases.dtype() + ); + + let lanes = lane_count(bases.dtype().as_ptype()); + + vortex_ensure!( + deltas.len().is_multiple_of(1024), + "deltas length ({}) must be a multiple of 1024", + deltas.len(), + ); + vortex_ensure!( + bases.len().is_multiple_of(lanes), + "bases length ({}) must be a multiple of LANES ({lanes})", + bases.len(), + ); // SAFETY: validation done above Ok(unsafe { Self::new_unchecked(bases, deltas, offset, logical_len) }) @@ -166,9 +154,7 @@ impl DeltaArray { } pub(crate) fn lanes(&self) -> usize { - let ptype = - PType::try_from(self.dtype()).vortex_expect("DeltaArray DType must be primitive"); - lane_count(ptype) + lane_count(self.dtype().as_ptype()) } #[inline] From a4991dcb06f6af8fb65ae99052cc95eab710a2fc Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Wed, 18 Mar 2026 22:15:23 -0700 Subject: [PATCH 11/18] fixes Signed-off-by: Robert Kruszewski --- encodings/fastlanes/public-api.lock | 6 +----- encodings/fastlanes/src/delta/array/mod.rs | 3 +-- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/encodings/fastlanes/public-api.lock b/encodings/fastlanes/public-api.lock index 369aec2cbbe..45bd7d15a42 100644 --- a/encodings/fastlanes/public-api.lock +++ b/encodings/fastlanes/public-api.lock @@ -674,10 +674,6 @@ impl vortex_array::vtable::validity::ValidityChildSliceHelper for vortex_fastlan pub fn vortex_fastlanes::RLEArray::unsliced_child_and_slice(&self) -> (&vortex_array::array::ArrayRef, usize, usize) -<<<<<<< HEAD -pub fn vortex_fastlanes::delta_compress(array: &vortex_array::arrays::primitive::array::PrimitiveArray) -> vortex_error::VortexResult<(vortex_array::arrays::primitive::array::PrimitiveArray, vortex_array::arrays::primitive::array::PrimitiveArray)> +pub fn vortex_fastlanes::delta_compress(array: &vortex_array::arrays::primitive::array::PrimitiveArray, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<(vortex_array::arrays::primitive::array::PrimitiveArray, vortex_array::arrays::primitive::array::PrimitiveArray)> pub fn vortex_fastlanes::initialize(session: &mut vortex_session::VortexSession) -======= -pub fn vortex_fastlanes::delta_compress(array: &vortex_array::arrays::primitive::array::PrimitiveArray, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<(vortex_array::arrays::primitive::array::PrimitiveArray, vortex_array::arrays::primitive::array::PrimitiveArray)> ->>>>>>> 8e20c980d (api) diff --git a/encodings/fastlanes/src/delta/array/mod.rs b/encodings/fastlanes/src/delta/array/mod.rs index 2fd74620832..0cefe4c9fa9 100644 --- a/encodings/fastlanes/src/delta/array/mod.rs +++ b/encodings/fastlanes/src/delta/array/mod.rs @@ -11,7 +11,6 @@ use vortex_array::dtype::DType; use vortex_array::dtype::PType; use vortex_array::match_each_unsigned_integer_ptype; use vortex_array::stats::ArrayStats; -use vortex_error::VortexExpect as _; use vortex_error::VortexResult; use vortex_error::vortex_ensure; @@ -26,7 +25,7 @@ pub mod delta_decompress; /// /// # Examples /// -/// ```no_run +/// ``` /// use vortex_array::arrays::PrimitiveArray; /// use vortex_array::VortexSessionExecute; /// use vortex_array::session::ArraySession; From d48ce1eb2fceb9cf028999a769d0a27e92175254 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Thu, 19 Mar 2026 20:44:42 -0700 Subject: [PATCH 12/18] inplace Signed-off-by: Robert Kruszewski --- .../fastlanes/src/bit_transpose/validity.rs | 48 +++++++++++++++++-- .../src/delta/array/delta_compress.rs | 34 +------------ 2 files changed, 45 insertions(+), 37 deletions(-) diff --git a/encodings/fastlanes/src/bit_transpose/validity.rs b/encodings/fastlanes/src/bit_transpose/validity.rs index 5a10cd66532..a5f9f0eb9dd 100644 --- a/encodings/fastlanes/src/bit_transpose/validity.rs +++ b/encodings/fastlanes/src/bit_transpose/validity.rs @@ -10,6 +10,7 @@ use vortex_array::IntoArray; use vortex_array::arrays::BoolArray; use vortex_array::validity::Validity; use vortex_buffer::BitBuffer; +use vortex_buffer::ByteBuffer; use vortex_buffer::ByteBufferMut; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -38,7 +39,25 @@ pub fn transpose_validity(validity: &Validity, ctx: &mut ExecutionCtx) -> Vortex #[inline] pub fn transpose_bitbuffer(bits: BitBuffer) -> BitBuffer { - fastlanes_layout_apply(bits, transpose_bits) + let (offset, len, bytes) = bits.into_inner(); + + if bytes.len().is_multiple_of(128) { + match bytes.try_into_mut() { + Ok(mut bytes_mut) => { + // We can ignore the spare trailer capacity that can be an artifact of allocator as we requested 128 multiple chunks + let (chunks, _) = bytes_mut.as_chunks_mut::<128>(); + let mut tmp = [0u8; 128]; + for chunk in chunks { + transpose_bits(chunk, &mut tmp); + chunk.copy_from_slice(&tmp); + } + BitBuffer::new_with_offset(bytes_mut.freeze().into_byte_buffer(), len, offset) + } + Err(bytes) => bits_op_with_copy(bytes, len, offset, transpose_bits), + } + } else { + bits_op_with_copy(bytes, len, offset, transpose_bits) + } } pub fn untranspose_validity(validity: &Validity, ctx: &mut ExecutionCtx) -> VortexResult { @@ -62,12 +81,31 @@ pub fn untranspose_validity(validity: &Validity, ctx: &mut ExecutionCtx) -> Vort #[inline] pub fn untranspose_bitbuffer(bits: BitBuffer) -> BitBuffer { - fastlanes_layout_apply(bits, untranspose_bits) -} - -fn fastlanes_layout_apply(bits: BitBuffer, op: F) -> BitBuffer { + assert!( + bits.inner().len().is_multiple_of(128), + "Transpose BitBuffer must be 128-byte aligned" + ); let (offset, len, bytes) = bits.into_inner(); + match bytes.try_into_mut() { + Ok(mut bytes_mut) => { + let (chunks, _) = bytes_mut.as_chunks_mut::<128>(); + let mut tmp = [0u8; 128]; + for chunk in chunks { + untranspose_bits(chunk, &mut tmp); + chunk.copy_from_slice(&tmp); + } + BitBuffer::new_with_offset(bytes_mut.freeze().into_byte_buffer(), len, offset) + } + Err(bytes) => bits_op_with_copy(bytes, len, offset, untranspose_bits), + } +} +fn bits_op_with_copy( + bytes: ByteBuffer, + len: usize, + offset: usize, + op: F, +) -> BitBuffer { let output_len = bytes.len().next_multiple_of(128); let mut output = ByteBufferMut::with_capacity(output_len); let (input_chunks, input_trailer) = bytes.as_chunks::<128>(); diff --git a/encodings/fastlanes/src/delta/array/delta_compress.rs b/encodings/fastlanes/src/delta/array/delta_compress.rs index e2d8c979254..636c0620815 100644 --- a/encodings/fastlanes/src/delta/array/delta_compress.rs +++ b/encodings/fastlanes/src/delta/array/delta_compress.rs @@ -21,7 +21,7 @@ use vortex_buffer::Buffer; use vortex_buffer::BufferMut; use vortex_error::VortexResult; -use crate::bit_transpose::transpose_bitbuffer; +use crate::bit_transpose::{transpose_bitbuffer, transpose_validity}; pub fn delta_compress( array: &PrimitiveArray, @@ -30,7 +30,7 @@ pub fn delta_compress( let (bases, deltas) = match_each_unsigned_integer_ptype!(array.ptype(), |T| { const LANES: usize = T::LANES; let (bases, deltas) = compress_primitive::(array.as_slice::()); - let validity = transpose_and_pad_validity(array.validity(), deltas.len(), ctx)?; + let validity = transpose_validity(array.validity(), ctx)?; ( PrimitiveArray::new(bases, array.dtype().nullability().into()), PrimitiveArray::new(deltas, validity), @@ -40,36 +40,6 @@ pub fn delta_compress( Ok((bases, deltas)) } -/// Transpose and pad validity to match the padded deltas length. -/// -/// For [`Validity::Array`], the validity bits are transposed into FastLanes order and then -/// extended to `padded_len`. The underlying byte buffer from transposition is already -/// padded to 128-byte alignment (1024 bits), which exactly matches our 1024-element chunks. -fn transpose_and_pad_validity( - validity: &Validity, - padded_len: usize, - ctx: &mut ExecutionCtx, -) -> VortexResult { - match validity { - Validity::Array(mask) => { - let bools = mask - .clone() - .execute::(ctx)? - .into_bool() - .into_bit_buffer(); - let transposed = transpose_bitbuffer(bools); - let (offset, _len, bytes) = transposed.into_inner(); - let padded = BitBuffer::new_with_offset(bytes, padded_len, offset); - Ok(Validity::Array( - BoolArray::new(padded, Validity::NonNullable).into_array(), - )) - } - v @ Validity::AllValid | v @ Validity::AllInvalid | v @ Validity::NonNullable => { - Ok(v.clone()) - } - } -} - fn compress_primitive( array: &[T], ) -> (Buffer, Buffer) { From 29af24d2ab29e9b7cac2edc85fe54b6892062e14 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Thu, 19 Mar 2026 20:47:00 -0700 Subject: [PATCH 13/18] comment Signed-off-by: Robert Kruszewski --- encodings/fastlanes/src/delta/array/delta_compress.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/encodings/fastlanes/src/delta/array/delta_compress.rs b/encodings/fastlanes/src/delta/array/delta_compress.rs index 636c0620815..1601055dbc6 100644 --- a/encodings/fastlanes/src/delta/array/delta_compress.rs +++ b/encodings/fastlanes/src/delta/array/delta_compress.rs @@ -7,21 +7,17 @@ use std::mem::MaybeUninit; use fastlanes::Delta; use fastlanes::FastLanes; use fastlanes::Transpose; -use vortex_array::Canonical; use vortex_array::ExecutionCtx; use vortex_array::IntoArray; -use vortex_array::arrays::BoolArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::dtype::NativePType; use vortex_array::match_each_unsigned_integer_ptype; -use vortex_array::validity::Validity; use vortex_array::vtable::ValidityHelper; -use vortex_buffer::BitBuffer; use vortex_buffer::Buffer; use vortex_buffer::BufferMut; use vortex_error::VortexResult; -use crate::bit_transpose::{transpose_bitbuffer, transpose_validity}; +use crate::bit_transpose::transpose_validity; pub fn delta_compress( array: &PrimitiveArray, @@ -30,6 +26,7 @@ pub fn delta_compress( let (bases, deltas) = match_each_unsigned_integer_ptype!(array.ptype(), |T| { const LANES: usize = T::LANES; let (bases, deltas) = compress_primitive::(array.as_slice::()); + // TODO(robert): This can be avoided if we add TransposedBoolArray that performs index translation when necessary. let validity = transpose_validity(array.validity(), ctx)?; ( PrimitiveArray::new(bases, array.dtype().nullability().into()), From ce669cb692939fc53667a07b7286123c3bf79c5a Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Thu, 19 Mar 2026 21:04:29 -0700 Subject: [PATCH 14/18] imports Signed-off-by: Robert Kruszewski --- encodings/fastlanes/src/delta/array/delta_compress.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/encodings/fastlanes/src/delta/array/delta_compress.rs b/encodings/fastlanes/src/delta/array/delta_compress.rs index 1601055dbc6..07174e294fa 100644 --- a/encodings/fastlanes/src/delta/array/delta_compress.rs +++ b/encodings/fastlanes/src/delta/array/delta_compress.rs @@ -8,7 +8,6 @@ use fastlanes::Delta; use fastlanes::FastLanes; use fastlanes::Transpose; use vortex_array::ExecutionCtx; -use vortex_array::IntoArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::dtype::NativePType; use vortex_array::match_each_unsigned_integer_ptype; @@ -24,8 +23,7 @@ pub fn delta_compress( ctx: &mut ExecutionCtx, ) -> VortexResult<(PrimitiveArray, PrimitiveArray)> { let (bases, deltas) = match_each_unsigned_integer_ptype!(array.ptype(), |T| { - const LANES: usize = T::LANES; - let (bases, deltas) = compress_primitive::(array.as_slice::()); + let (bases, deltas) = compress_primitive::(array.as_slice::()); // TODO(robert): This can be avoided if we add TransposedBoolArray that performs index translation when necessary. let validity = transpose_validity(array.validity(), ctx)?; ( From 23493b6af97eb99472e73af306b23b862875d3bf Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Thu, 19 Mar 2026 21:19:04 -0700 Subject: [PATCH 15/18] fixes Signed-off-by: Robert Kruszewski --- encodings/fastlanes/src/bit_transpose/validity.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/encodings/fastlanes/src/bit_transpose/validity.rs b/encodings/fastlanes/src/bit_transpose/validity.rs index a5f9f0eb9dd..cba118097b3 100644 --- a/encodings/fastlanes/src/bit_transpose/validity.rs +++ b/encodings/fastlanes/src/bit_transpose/validity.rs @@ -131,5 +131,5 @@ fn bits_op_with_copy( } unsafe { output.set_len(output_len) }; - BitBuffer::new_with_offset(output.freeze().into_byte_buffer(), len, offset) + BitBuffer::new_with_offset(output.freeze().into_byte_buffer(), len.next_multiple_of(1024), offset) } From eb159ea323e11a640fafb786fa90be950245fcf8 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Thu, 19 Mar 2026 21:41:08 -0700 Subject: [PATCH 16/18] format Signed-off-by: Robert Kruszewski --- encodings/fastlanes/src/bit_transpose/validity.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/encodings/fastlanes/src/bit_transpose/validity.rs b/encodings/fastlanes/src/bit_transpose/validity.rs index cba118097b3..13f1c6b0385 100644 --- a/encodings/fastlanes/src/bit_transpose/validity.rs +++ b/encodings/fastlanes/src/bit_transpose/validity.rs @@ -131,5 +131,9 @@ fn bits_op_with_copy( } unsafe { output.set_len(output_len) }; - BitBuffer::new_with_offset(output.freeze().into_byte_buffer(), len.next_multiple_of(1024), offset) + BitBuffer::new_with_offset( + output.freeze().into_byte_buffer(), + len.next_multiple_of(1024), + offset, + ) } From fa1f73cc2c01f556d01f35c2e6cdc2a250946e2e Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Thu, 19 Mar 2026 21:58:35 -0700 Subject: [PATCH 17/18] ignore delta Signed-off-by: Robert Kruszewski --- .../compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs index ce1296157fe..0c68d5e9abd 100644 --- a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs +++ b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs @@ -37,7 +37,8 @@ pub fn fixtures() -> Vec> { Box::new(bytebool::ByteBoolFixture), Box::new(datetimeparts::DateTimePartsFixture), Box::new(decimal_byte_parts::DecimalBytePartsFixture), - Box::new(delta::DeltaFixture), + // Reenable this once delta is stable + // Box::new(delta::DeltaFixture), Box::new(dict::DictFixture), Box::new(fsst::FsstFixture), Box::new(for_::FoRFixture), From ad7d841ed35c4a22f4ea98bf5d5584d35ef84956 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Thu, 19 Mar 2026 22:08:50 -0700 Subject: [PATCH 18/18] nit Signed-off-by: Robert Kruszewski --- .../compat-gen/src/fixtures/arrays/synthetic/encodings/delta.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/delta.rs b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/delta.rs index 88bc8d38eb2..2c83df575ef 100644 --- a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/delta.rs +++ b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/delta.rs @@ -18,6 +18,7 @@ use vortex_session::VortexSession; use super::N; use crate::fixtures::FlatLayoutFixture; +#[expect(dead_code)] pub struct DeltaFixture; impl FlatLayoutFixture for DeltaFixture {