diff --git a/Cargo.lock b/Cargo.lock index f3d1d80a2da..b0ddac8cfa5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10320,10 +10320,15 @@ dependencies = [ "tokio", "tracing", "uuid", + "vortex-alp", "vortex-array", "vortex-btrblocks", "vortex-buffer", + "vortex-bytebool", + "vortex-datetime-parts", + "vortex-decimal-byte-parts", "vortex-error", + "vortex-fastlanes", "vortex-flatbuffers", "vortex-io", "vortex-mask", @@ -10331,6 +10336,7 @@ dependencies = [ "vortex-sequence", "vortex-session", "vortex-utils", + "vortex-zigzag", ] [[package]] diff --git a/encodings/alp/public-api.lock b/encodings/alp/public-api.lock index 4109ac4dfd1..458502c62cb 100644 --- a/encodings/alp/public-api.lock +++ b/encodings/alp/public-api.lock @@ -90,6 +90,8 @@ pub fn vortex_alp::ALP::nbuffers(_array: &vortex_alp::ALPArray) -> usize pub fn vortex_alp::ALP::nchildren(array: &vortex_alp::ALPArray) -> usize +pub fn vortex_alp::ALP::plan_range_read(metadata: &vortex_array::metadata::ProstMetadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_alp::ALP::reduce_parent(array: &Self::Array, parent: &vortex_array::array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> pub fn vortex_alp::ALP::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> @@ -246,6 +248,8 @@ pub fn vortex_alp::ALPRD::nbuffers(_array: &vortex_alp::ALPRDArray) -> usize pub fn vortex_alp::ALPRD::nchildren(array: &vortex_alp::ALPRDArray) -> usize +pub fn vortex_alp::ALPRD::plan_range_read(metadata: &vortex_array::metadata::ProstMetadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_alp::ALPRD::reduce_parent(array: &Self::Array, parent: &vortex_array::array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> pub fn vortex_alp::ALPRD::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index 965c1622ca8..4fc4f724dd9 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -3,6 +3,7 @@ use std::fmt::Debug; use std::hash::Hash; +use std::ops::Range; use vortex_array::ArrayEq; use vortex_array::ArrayHash; @@ -25,6 +26,9 @@ use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::vtable; use vortex_array::vtable::ArrayId; +use vortex_array::vtable::ChildRangeRead; +use vortex_array::vtable::EncodingRangeRead; +use vortex_array::vtable::RangeDecodeInfo; use vortex_array::vtable::VTable; use vortex_array::vtable::ValidityChild; use vortex_array::vtable::ValidityVTableFromChild; @@ -258,6 +262,38 @@ impl VTable for ALP { ) -> VortexResult> { PARENT_KERNELS.execute(array, parent, child_idx, ctx) } + + fn plan_range_read( + metadata: &ProstMetadata, + row_range: Range, + row_count: usize, + dtype: &DType, + ) -> Option { + // Patches cannot be safely sub-ranged (global indices). + if metadata.0.patches.is_some() { + return None; + } + + // Child 0 = encoded values (f32→i32 or f64→i64). + let child_dtype = match dtype { + DType::Primitive(PType::F32, n) => DType::Primitive(PType::I32, *n), + DType::Primitive(PType::F64, n) => DType::Primitive(PType::I64, *n), + _ => return None, + }; + + Some(EncodingRangeRead { + buffer_sub_ranges: vec![], + children: vec![ChildRangeRead::Recurse { + row_range, + row_count, + dtype: child_dtype, + }], + decode_info: RangeDecodeInfo::FromChild { + child_idx: 0, + divisor: 1, + }, + }) + } } #[derive(Clone, Debug)] diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index df72cc189d3..b80dbc38f4b 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -3,6 +3,7 @@ use std::fmt::Debug; use std::hash::Hash; +use std::ops::Range; use itertools::Itertools; use vortex_array::ArrayEq; @@ -29,6 +30,9 @@ use vortex_array::stats::StatsSetRef; use vortex_array::validity::Validity; use vortex_array::vtable; use vortex_array::vtable::ArrayId; +use vortex_array::vtable::ChildRangeRead; +use vortex_array::vtable::EncodingRangeRead; +use vortex_array::vtable::RangeDecodeInfo; use vortex_array::vtable::VTable; use vortex_array::vtable::ValidityChild; use vortex_array::vtable::ValidityVTableFromChild; @@ -354,6 +358,53 @@ impl VTable for ALPRD { ) -> VortexResult> { PARENT_KERNELS.execute(array, parent, child_idx, ctx) } + + fn plan_range_read( + metadata: &ProstMetadata, + row_range: Range, + row_count: usize, + dtype: &DType, + ) -> Option { + // Patches cannot be safely sub-ranged (global indices). + if metadata.0.patches.is_some() { + return None; + } + + let left_parts_ptype = PType::try_from(metadata.0.left_parts_ptype).ok()?; + let left_parts_dtype = DType::Primitive(left_parts_ptype, dtype.nullability()); + + let right_parts_dtype = match dtype { + DType::Primitive(PType::F32, _) => { + DType::Primitive(PType::U32, Nullability::NonNullable) + } + DType::Primitive(PType::F64, _) => { + DType::Primitive(PType::U64, Nullability::NonNullable) + } + _ => return None, + }; + + Some(EncodingRangeRead { + buffer_sub_ranges: vec![], + children: vec![ + // Child 0 = left_parts. + ChildRangeRead::Recurse { + row_range: row_range.clone(), + row_count, + dtype: left_parts_dtype, + }, + // Child 1 = right_parts. + ChildRangeRead::Recurse { + row_range, + row_count, + dtype: right_parts_dtype, + }, + ], + decode_info: RangeDecodeInfo::FromChild { + child_idx: 0, + divisor: 1, + }, + }) + } } #[derive(Clone, Debug)] diff --git a/encodings/bytebool/public-api.lock b/encodings/bytebool/public-api.lock index 192d025cf34..f7201f2408e 100644 --- a/encodings/bytebool/public-api.lock +++ b/encodings/bytebool/public-api.lock @@ -68,6 +68,8 @@ pub fn vortex_bytebool::ByteBool::nbuffers(_array: &vortex_bytebool::ByteBoolArr pub fn vortex_bytebool::ByteBool::nchildren(array: &vortex_bytebool::ByteBoolArray) -> usize +pub fn vortex_bytebool::ByteBool::plan_range_read(_metadata: &vortex_array::metadata::EmptyMetadata, row_range: core::ops::range::Range, _row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_bytebool::ByteBool::reduce_parent(array: &Self::Array, parent: &vortex_array::array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> pub fn vortex_bytebool::ByteBool::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> diff --git a/encodings/bytebool/src/array.rs b/encodings/bytebool/src/array.rs index 4b6a0e7cb58..490f7761247 100644 --- a/encodings/bytebool/src/array.rs +++ b/encodings/bytebool/src/array.rs @@ -3,6 +3,7 @@ use std::fmt::Debug; use std::hash::Hash; +use std::ops::Range; use vortex_array::ArrayEq; use vortex_array::ArrayHash; @@ -22,7 +23,10 @@ use vortex_array::stats::StatsSetRef; use vortex_array::validity::Validity; use vortex_array::vtable; use vortex_array::vtable::ArrayId; +use vortex_array::vtable::BufferSubRange; +use vortex_array::vtable::EncodingRangeRead; use vortex_array::vtable::OperationsVTable; +use vortex_array::vtable::RangeDecodeInfo; use vortex_array::vtable::VTable; use vortex_array::vtable::ValidityHelper; use vortex_array::vtable::ValidityVTableFromValidityHelper; @@ -199,6 +203,23 @@ impl VTable for ByteBool { ) -> VortexResult> { PARENT_KERNELS.execute(array, parent, child_idx, ctx) } + + fn plan_range_read( + _metadata: &EmptyMetadata, + row_range: Range, + _row_count: usize, + _dtype: &DType, + ) -> Option { + // 1 byte per boolean value + Some(EncodingRangeRead { + buffer_sub_ranges: vec![BufferSubRange::Range(row_range.start..row_range.end)], + children: vec![], + decode_info: RangeDecodeInfo::Leaf { + decode_len: row_range.len(), + post_slice: None, + }, + }) + } } #[derive(Clone, Debug)] diff --git a/encodings/datetime-parts/public-api.lock b/encodings/datetime-parts/public-api.lock index f8c1c15d076..51c28edb91f 100644 --- a/encodings/datetime-parts/public-api.lock +++ b/encodings/datetime-parts/public-api.lock @@ -80,6 +80,8 @@ pub fn vortex_datetime_parts::DateTimeParts::nbuffers(_array: &vortex_datetime_p pub fn vortex_datetime_parts::DateTimeParts::nchildren(_array: &vortex_datetime_parts::DateTimePartsArray) -> usize +pub fn vortex_datetime_parts::DateTimeParts::plan_range_read(metadata: &vortex_array::metadata::ProstMetadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_datetime_parts::DateTimeParts::reduce_parent(array: &Self::Array, parent: &vortex_array::array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> pub fn vortex_datetime_parts::DateTimeParts::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> diff --git a/encodings/datetime-parts/src/array.rs b/encodings/datetime-parts/src/array.rs index b50ff8db0a5..75cd2bd9692 100644 --- a/encodings/datetime-parts/src/array.rs +++ b/encodings/datetime-parts/src/array.rs @@ -3,6 +3,7 @@ use std::fmt::Debug; use std::hash::Hash; +use std::ops::Range; use vortex_array::ArrayEq; use vortex_array::ArrayHash; @@ -24,6 +25,9 @@ use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::vtable; use vortex_array::vtable::ArrayId; +use vortex_array::vtable::ChildRangeRead; +use vortex_array::vtable::EncodingRangeRead; +use vortex_array::vtable::RangeDecodeInfo; use vortex_array::vtable::VTable; use vortex_array::vtable::ValidityChild; use vortex_array::vtable::ValidityVTableFromChild; @@ -244,6 +248,45 @@ impl VTable for DateTimeParts { ) -> VortexResult> { PARENT_KERNELS.execute(array, parent, child_idx, ctx) } + + fn plan_range_read( + metadata: &ProstMetadata, + row_range: Range, + row_count: usize, + dtype: &DType, + ) -> Option { + let days_ptype = metadata.0.get_days_ptype().ok()?; + let seconds_ptype = metadata.0.get_seconds_ptype().ok()?; + let subseconds_ptype = metadata.0.get_subseconds_ptype().ok()?; + + Some(EncodingRangeRead { + buffer_sub_ranges: vec![], + children: vec![ + // Child 0 = days (carries validity from parent dtype). + ChildRangeRead::Recurse { + row_range: row_range.clone(), + row_count, + dtype: DType::Primitive(days_ptype, dtype.nullability()), + }, + // Child 1 = seconds (always non-nullable). + ChildRangeRead::Recurse { + row_range: row_range.clone(), + row_count, + dtype: DType::Primitive(seconds_ptype, Nullability::NonNullable), + }, + // Child 2 = subseconds (always non-nullable). + ChildRangeRead::Recurse { + row_range, + row_count, + dtype: DType::Primitive(subseconds_ptype, Nullability::NonNullable), + }, + ], + decode_info: RangeDecodeInfo::FromChild { + child_idx: 0, + divisor: 1, + }, + }) + } } #[derive(Clone, Debug)] diff --git a/encodings/decimal-byte-parts/public-api.lock b/encodings/decimal-byte-parts/public-api.lock index e805c8a1b07..ada90956f2d 100644 --- a/encodings/decimal-byte-parts/public-api.lock +++ b/encodings/decimal-byte-parts/public-api.lock @@ -80,6 +80,8 @@ pub fn vortex_decimal_byte_parts::DecimalByteParts::nbuffers(_array: &vortex_dec pub fn vortex_decimal_byte_parts::DecimalByteParts::nchildren(_array: &vortex_decimal_byte_parts::DecimalBytePartsArray) -> usize +pub fn vortex_decimal_byte_parts::DecimalByteParts::plan_range_read(metadata: &vortex_array::metadata::ProstMetadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_decimal_byte_parts::DecimalByteParts::reduce_parent(array: &Self::Array, parent: &vortex_array::array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> pub fn vortex_decimal_byte_parts::DecimalByteParts::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs index b60f6516259..8f654049b8a 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs @@ -6,6 +6,7 @@ mod rules; mod slice; use std::hash::Hash; +use std::ops::Range; use prost::Message as _; use vortex_array::ArrayEq; @@ -33,7 +34,10 @@ use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::vtable; use vortex_array::vtable::ArrayId; +use vortex_array::vtable::ChildRangeRead; +use vortex_array::vtable::EncodingRangeRead; use vortex_array::vtable::OperationsVTable; +use vortex_array::vtable::RangeDecodeInfo; use vortex_array::vtable::VTable; use vortex_array::vtable::ValidityChild; use vortex_array::vtable::ValidityHelper; @@ -202,6 +206,27 @@ impl VTable for DecimalByteParts { ) -> VortexResult> { PARENT_KERNELS.execute(array, parent, child_idx, ctx) } + + fn plan_range_read( + metadata: &ProstMetadata, + row_range: Range, + row_count: usize, + dtype: &DType, + ) -> Option { + let child_dtype = DType::Primitive(metadata.zeroth_child_ptype(), dtype.nullability()); + Some(EncodingRangeRead { + buffer_sub_ranges: vec![], + children: vec![ChildRangeRead::Recurse { + row_range, + row_count, + dtype: child_dtype, + }], + decode_info: RangeDecodeInfo::FromChild { + child_idx: 0, + divisor: 1, + }, + }) + } } /// This array encodes decimals as between 1-4 columns of primitive typed children. diff --git a/encodings/fastlanes/public-api.lock b/encodings/fastlanes/public-api.lock index 8b6cd3acb17..65771b918a0 100644 --- a/encodings/fastlanes/public-api.lock +++ b/encodings/fastlanes/public-api.lock @@ -186,6 +186,8 @@ pub fn vortex_fastlanes::BitPacked::nbuffers(_array: &vortex_fastlanes::BitPacke pub fn vortex_fastlanes::BitPacked::nchildren(array: &vortex_fastlanes::BitPackedArray) -> usize +pub fn vortex_fastlanes::BitPacked::plan_range_read(metadata: &vortex_array::metadata::ProstMetadata, row_range: core::ops::range::Range, _row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_fastlanes::BitPacked::reduce_parent(array: &Self::Array, parent: &vortex_array::array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> pub fn vortex_fastlanes::BitPacked::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> @@ -332,6 +334,8 @@ pub fn vortex_fastlanes::Delta::nbuffers(_array: &vortex_fastlanes::DeltaArray) pub fn vortex_fastlanes::Delta::nchildren(_array: &vortex_fastlanes::DeltaArray) -> usize +pub fn vortex_fastlanes::Delta::plan_range_read(metadata: &vortex_array::metadata::ProstMetadata, row_range: core::ops::range::Range, _row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_fastlanes::Delta::reduce_parent(array: &Self::Array, parent: &vortex_array::array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> pub fn vortex_fastlanes::Delta::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> @@ -484,6 +488,8 @@ pub fn vortex_fastlanes::FoR::nbuffers(_array: &vortex_fastlanes::FoRArray) -> u pub fn vortex_fastlanes::FoR::nchildren(_array: &vortex_fastlanes::FoRArray) -> usize +pub fn vortex_fastlanes::FoR::plan_range_read(_metadata: &vortex_array::scalar::Scalar, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_fastlanes::FoR::reduce_parent(array: &Self::Array, parent: &vortex_array::array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> pub fn vortex_fastlanes::FoR::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> diff --git a/encodings/fastlanes/src/bitpacking/vtable/mod.rs b/encodings/fastlanes/src/bitpacking/vtable/mod.rs index c414dfa5cc0..94fad61dd0d 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/mod.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::hash::Hash; +use std::ops::Range; use vortex_array::ArrayEq; use vortex_array::ArrayHash; @@ -25,6 +26,9 @@ use vortex_array::stats::StatsSetRef; use vortex_array::validity::Validity; use vortex_array::vtable; use vortex_array::vtable::ArrayId; +use vortex_array::vtable::BufferSubRange; +use vortex_array::vtable::EncodingRangeRead; +use vortex_array::vtable::RangeDecodeInfo; use vortex_array::vtable::VTable; use vortex_array::vtable::ValidityVTableFromValidityHelper; use vortex_array::vtable::patches_child; @@ -366,6 +370,44 @@ impl VTable for BitPacked { ) -> VortexResult> { PARENT_KERNELS.execute(array, parent, child_idx, ctx) } + + fn plan_range_read( + metadata: &ProstMetadata, + row_range: Range, + _row_count: usize, + _dtype: &DType, + ) -> Option { + let bit_width = metadata.0.bit_width as usize; + + if bit_width == 0 || metadata.0.offset != 0 { + return None; + } + + // Patches and validity children not supported for range read. + if metadata.0.patches.is_some() { + return None; + } + + let first_block = row_range.start / 1024; + let last_block = row_range.end.saturating_sub(1) / 1024; + let bytes_per_block = 128 * bit_width; + let byte_start = first_block * bytes_per_block; + let byte_end = (last_block + 1) * bytes_per_block; + + let block_start_row = first_block * 1024; + let decode_len = row_range.end - block_start_row; + let intra_block_offset = row_range.start - block_start_row; + let post_slice = (intra_block_offset > 0).then_some(intra_block_offset..decode_len); + + Some(EncodingRangeRead { + buffer_sub_ranges: vec![BufferSubRange::Range(byte_start..byte_end)], + children: vec![], + decode_info: RangeDecodeInfo::Leaf { + decode_len, + post_slice, + }, + }) + } } #[derive(Debug)] diff --git a/encodings/fastlanes/src/delta/vtable/mod.rs b/encodings/fastlanes/src/delta/vtable/mod.rs index 9ebbaebe729..dd300dd45ba 100644 --- a/encodings/fastlanes/src/delta/vtable/mod.rs +++ b/encodings/fastlanes/src/delta/vtable/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::hash::Hash; +use std::ops::Range; use fastlanes::FastLanes; use prost::Message; @@ -21,11 +22,13 @@ use vortex_array::serde::ArrayChildren; use vortex_array::stats::StatsSetRef; use vortex_array::vtable; use vortex_array::vtable::ArrayId; +use vortex_array::vtable::ChildRangeRead; +use vortex_array::vtable::EncodingRangeRead; +use vortex_array::vtable::RangeDecodeInfo; use vortex_array::vtable::VTable; use vortex_array::vtable::ValidityVTableFromChildSliceHelper; use vortex_error::VortexResult; use vortex_error::vortex_ensure; -use vortex_error::vortex_err; use vortex_error::vortex_panic; use vortex_session::VortexSession; @@ -177,11 +180,16 @@ impl VTable for Delta { let ptype = PType::try_from(dtype)?; let lanes = match_each_unsigned_integer_ptype!(ptype, |T| { ::LANES }); - // Compute the length of the bases array - let deltas_len = usize::try_from(metadata.0.deltas_len) - .map_err(|_| vortex_err!("deltas_len {} overflowed usize", metadata.0.deltas_len))?; + // Compute the length of the deltas array from len + offset rather than metadata. + // This allows range reads to work with sub-ranged children, where the buffer is + // shorter than the original metadata.deltas_len. + let deltas_len = len + metadata.0.offset as usize; let num_chunks = deltas_len / 1024; - let remainder_base_size = if deltas_len % 1024 > 0 { 1 } else { 0 }; + let remainder_base_size = if !deltas_len.is_multiple_of(1024) { + 1 + } else { + 0 + }; let bases_len = num_chunks * lanes + remainder_base_size; let bases = children.get(0, dtype, bases_len)?; @@ -195,6 +203,76 @@ impl VTable for Delta { delta_decompress(array, ctx)?.into_array(), )) } + + fn plan_range_read( + metadata: &ProstMetadata, + row_range: Range, + _row_count: usize, + dtype: &DType, + ) -> Option { + if metadata.0.offset != 0 { + return None; + } + + let deltas_len = usize::try_from(metadata.0.deltas_len).ok()?; + let byte_width = match dtype { + DType::Primitive(ptype, _) => ptype.byte_width(), + _ => return None, + }; + let lanes = match byte_width { + 1 => 128, + 2 => 64, + 4 => 32, + 8 => 16, + _ => return None, + }; + + let first_chunk = row_range.start / 1024; + let last_chunk = row_range.end.saturating_sub(1) / 1024; + + // Child 1 = deltas (row-indexed, same dtype). + let deltas_row_start = first_chunk * 1024; + let deltas_row_end = ((last_chunk + 1) * 1024).min(deltas_len); + + // Child 0 = bases (LANES values per full chunk, 1 per remainder). + let num_full_chunks = deltas_len / 1024; + let has_remainder = !deltas_len.is_multiple_of(1024); + let bases_len = num_full_chunks * lanes + if has_remainder { 1 } else { 0 }; + let bases_row_start = first_chunk * lanes; + let bases_row_end = if last_chunk >= num_full_chunks { + bases_len + } else { + (last_chunk + 1) * lanes + } + .min(bases_len); + + let sub_deltas_len = deltas_row_end - deltas_row_start; + let intra_chunk_offset = row_range.start - deltas_row_start; + let post_slice = (intra_chunk_offset > 0 || sub_deltas_len > row_range.len()) + .then(|| intra_chunk_offset..(intra_chunk_offset + row_range.len())); + + Some(EncodingRangeRead { + buffer_sub_ranges: vec![], + children: vec![ + // Child 0 = bases. + ChildRangeRead::Recurse { + row_range: bases_row_start..bases_row_end, + row_count: bases_len, + dtype: dtype.clone(), + }, + // Child 1 = deltas. + ChildRangeRead::Recurse { + row_range: deltas_row_start..deltas_row_end, + row_count: deltas_len, + dtype: dtype.clone(), + }, + ], + decode_info: RangeDecodeInfo::Leaf { + decode_len: sub_deltas_len, + post_slice, + }, + }) + } } #[derive(Debug)] diff --git a/encodings/fastlanes/src/for/vtable/mod.rs b/encodings/fastlanes/src/for/vtable/mod.rs index 5eb73711ae4..0c80a66f7d0 100644 --- a/encodings/fastlanes/src/for/vtable/mod.rs +++ b/encodings/fastlanes/src/for/vtable/mod.rs @@ -3,6 +3,7 @@ use std::fmt::Debug; use std::hash::Hash; +use std::ops::Range; use vortex_array::ArrayEq; use vortex_array::ArrayHash; @@ -19,6 +20,9 @@ use vortex_array::serde::ArrayChildren; use vortex_array::stats::StatsSetRef; use vortex_array::vtable; use vortex_array::vtable::ArrayId; +use vortex_array::vtable::ChildRangeRead; +use vortex_array::vtable::EncodingRangeRead; +use vortex_array::vtable::RangeDecodeInfo; use vortex_array::vtable::VTable; use vortex_array::vtable::ValidityVTableFromChild; use vortex_error::VortexResult; @@ -178,6 +182,27 @@ impl VTable for FoR { ) -> VortexResult> { PARENT_KERNELS.execute(array, parent, child_idx, ctx) } + + fn plan_range_read( + _metadata: &Scalar, + row_range: Range, + row_count: usize, + dtype: &DType, + ) -> Option { + // FoR is a transparent wrapper: delegate entirely to child 0. + Some(EncodingRangeRead { + buffer_sub_ranges: vec![], + children: vec![ChildRangeRead::Recurse { + row_range, + row_count, + dtype: dtype.clone(), + }], + decode_info: RangeDecodeInfo::FromChild { + child_idx: 0, + divisor: 1, + }, + }) + } } #[derive(Debug)] diff --git a/encodings/sequence/public-api.lock b/encodings/sequence/public-api.lock index b92af122611..d5b63a4013c 100644 --- a/encodings/sequence/public-api.lock +++ b/encodings/sequence/public-api.lock @@ -86,6 +86,8 @@ pub fn vortex_sequence::Sequence::nbuffers(_array: &vortex_sequence::SequenceArr pub fn vortex_sequence::Sequence::nchildren(_array: &vortex_sequence::SequenceArray) -> usize +pub fn vortex_sequence::Sequence::plan_range_read(_metadata: &vortex_sequence::array::SequenceMetadata, row_range: core::ops::range::Range, _row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_sequence::Sequence::reduce_parent(array: &vortex_sequence::SequenceArray, parent: &vortex_array::array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> pub fn vortex_sequence::Sequence::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult>> diff --git a/encodings/sequence/src/array.rs b/encodings/sequence/src/array.rs index ea716ced1ea..4677403d8b1 100644 --- a/encodings/sequence/src/array.rs +++ b/encodings/sequence/src/array.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::hash::Hash; +use std::ops::Range; use num_traits::cast::FromPrimitive; use vortex_array::ArrayRef; @@ -32,7 +33,9 @@ use vortex_array::stats::StatsSetRef; use vortex_array::validity::Validity; use vortex_array::vtable; use vortex_array::vtable::ArrayId; +use vortex_array::vtable::EncodingRangeRead; use vortex_array::vtable::OperationsVTable; +use vortex_array::vtable::RangeDecodeInfo; use vortex_array::vtable::VTable; use vortex_array::vtable::ValidityVTable; use vortex_error::VortexExpect; @@ -401,6 +404,22 @@ impl VTable for Sequence { ) -> VortexResult> { RULES.evaluate(array, parent, child_idx) } + + fn plan_range_read( + _metadata: &SequenceMetadata, + row_range: Range, + _row_count: usize, + _dtype: &DType, + ) -> Option { + Some(EncodingRangeRead { + buffer_sub_ranges: vec![], + children: vec![], + decode_info: RangeDecodeInfo::Leaf { + decode_len: row_range.len(), + post_slice: None, + }, + }) + } } impl OperationsVTable for Sequence { diff --git a/encodings/zigzag/public-api.lock b/encodings/zigzag/public-api.lock index 590e897cd8c..05be7954b22 100644 --- a/encodings/zigzag/public-api.lock +++ b/encodings/zigzag/public-api.lock @@ -72,6 +72,8 @@ pub fn vortex_zigzag::ZigZag::nbuffers(_array: &vortex_zigzag::ZigZagArray) -> u pub fn vortex_zigzag::ZigZag::nchildren(_array: &vortex_zigzag::ZigZagArray) -> usize +pub fn vortex_zigzag::ZigZag::plan_range_read(_metadata: &vortex_array::metadata::EmptyMetadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_zigzag::ZigZag::reduce_parent(array: &Self::Array, parent: &vortex_array::array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> pub fn vortex_zigzag::ZigZag::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> diff --git a/encodings/zigzag/src/array.rs b/encodings/zigzag/src/array.rs index f0b7898b26c..1dc4eb6cea7 100644 --- a/encodings/zigzag/src/array.rs +++ b/encodings/zigzag/src/array.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::hash::Hash; +use std::ops::Range; use vortex_array::ArrayEq; use vortex_array::ArrayHash; @@ -22,7 +23,10 @@ use vortex_array::stats::ArrayStats; use vortex_array::stats::StatsSetRef; use vortex_array::vtable; use vortex_array::vtable::ArrayId; +use vortex_array::vtable::ChildRangeRead; +use vortex_array::vtable::EncodingRangeRead; use vortex_array::vtable::OperationsVTable; +use vortex_array::vtable::RangeDecodeInfo; use vortex_array::vtable::VTable; use vortex_array::vtable::ValidityChild; use vortex_array::vtable::ValidityVTableFromChild; @@ -171,6 +175,29 @@ impl VTable for ZigZag { ) -> VortexResult> { PARENT_KERNELS.execute(array, parent, child_idx, ctx) } + + fn plan_range_read( + _metadata: &EmptyMetadata, + row_range: Range, + row_count: usize, + dtype: &DType, + ) -> Option { + // ZigZag is a transparent wrapper: delegate entirely to child 0. + let ptype = PType::try_from(dtype).ok()?; + let encoded_dtype = DType::Primitive(ptype.to_unsigned(), dtype.nullability()); + Some(EncodingRangeRead { + buffer_sub_ranges: vec![], + children: vec![ChildRangeRead::Recurse { + row_range, + row_count, + dtype: encoded_dtype, + }], + decode_info: RangeDecodeInfo::FromChild { + child_idx: 0, + divisor: 1, + }, + }) + } } #[derive(Clone, Debug)] diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index ed872aa095a..a09907f3dc4 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -510,6 +510,8 @@ pub fn vortex_array::arrays::Bool::nbuffers(_array: &vortex_array::arrays::BoolA pub fn vortex_array::arrays::Bool::nchildren(array: &vortex_array::arrays::BoolArray) -> usize +pub fn vortex_array::arrays::Bool::plan_range_read(metadata: &vortex_array::ProstMetadata, row_range: core::ops::range::Range, _row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Bool::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Bool::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -736,6 +738,8 @@ pub fn vortex_array::arrays::Chunked::nbuffers(_array: &vortex_array::arrays::Ch pub fn vortex_array::arrays::Chunked::nchildren(array: &vortex_array::arrays::ChunkedArray) -> usize +pub fn vortex_array::arrays::Chunked::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Chunked::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Chunked::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -910,6 +914,8 @@ pub fn vortex_array::arrays::Constant::nbuffers(_array: &vortex_array::arrays::C pub fn vortex_array::arrays::Constant::nchildren(_array: &vortex_array::arrays::ConstantArray) -> usize +pub fn vortex_array::arrays::Constant::plan_range_read(_metadata: &vortex_array::scalar::Scalar, row_range: core::ops::range::Range, _row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Constant::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Constant::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -1134,6 +1140,8 @@ pub fn vortex_array::arrays::Decimal::nbuffers(_array: &vortex_array::arrays::De pub fn vortex_array::arrays::Decimal::nchildren(array: &vortex_array::arrays::DecimalArray) -> usize +pub fn vortex_array::arrays::Decimal::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Decimal::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Decimal::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -1356,6 +1364,8 @@ pub fn vortex_array::arrays::dict::Dict::nbuffers(_array: &vortex_array::arrays: pub fn vortex_array::arrays::dict::Dict::nchildren(_array: &vortex_array::arrays::dict::DictArray) -> usize +pub fn vortex_array::arrays::dict::Dict::plan_range_read(metadata: &vortex_array::ProstMetadata, row_range: core::ops::range::Range, row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::dict::Dict::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::dict::Dict::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -1474,6 +1484,8 @@ pub fn vortex_array::arrays::dict::Dict::nbuffers(_array: &vortex_array::arrays: pub fn vortex_array::arrays::dict::Dict::nchildren(_array: &vortex_array::arrays::dict::DictArray) -> usize +pub fn vortex_array::arrays::dict::Dict::plan_range_read(metadata: &vortex_array::ProstMetadata, row_range: core::ops::range::Range, row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::dict::Dict::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::dict::Dict::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -1788,6 +1800,8 @@ pub fn vortex_array::arrays::Extension::nbuffers(_array: &vortex_array::arrays:: pub fn vortex_array::arrays::Extension::nchildren(_array: &vortex_array::arrays::ExtensionArray) -> usize +pub fn vortex_array::arrays::Extension::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Extension::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Extension::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -1924,6 +1938,8 @@ pub fn vortex_array::arrays::Filter::nbuffers(_array: &Self::Array) -> usize pub fn vortex_array::arrays::Filter::nchildren(_array: &Self::Array) -> usize +pub fn vortex_array::arrays::Filter::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Filter::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Filter::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -2154,6 +2170,8 @@ pub fn vortex_array::arrays::FixedSizeList::nbuffers(_array: &vortex_array::arra pub fn vortex_array::arrays::FixedSizeList::nchildren(array: &vortex_array::arrays::FixedSizeListArray) -> usize +pub fn vortex_array::arrays::FixedSizeList::plan_range_read(_metadata: &vortex_array::EmptyMetadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::FixedSizeList::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::FixedSizeList::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -2316,6 +2334,8 @@ pub fn vortex_array::arrays::List::nbuffers(_array: &vortex_array::arrays::ListA pub fn vortex_array::arrays::List::nchildren(array: &vortex_array::arrays::ListArray) -> usize +pub fn vortex_array::arrays::List::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::List::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::List::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -2498,6 +2518,8 @@ pub fn vortex_array::arrays::ListView::nbuffers(_array: &vortex_array::arrays::L pub fn vortex_array::arrays::ListView::nchildren(array: &vortex_array::arrays::ListViewArray) -> usize +pub fn vortex_array::arrays::ListView::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::ListView::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::ListView::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -2676,6 +2698,8 @@ pub fn vortex_array::arrays::Masked::nbuffers(_array: &Self::Array) -> usize pub fn vortex_array::arrays::Masked::nchildren(array: &Self::Array) -> usize +pub fn vortex_array::arrays::Masked::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Masked::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Masked::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -2818,6 +2842,8 @@ pub fn vortex_array::arrays::null::Null::nbuffers(_array: &vortex_array::arrays: pub fn vortex_array::arrays::null::Null::nchildren(_array: &vortex_array::arrays::null::NullArray) -> usize +pub fn vortex_array::arrays::null::Null::plan_range_read(_metadata: &vortex_array::EmptyMetadata, row_range: core::ops::range::Range, _row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::null::Null::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::null::Null::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -3050,6 +3076,8 @@ pub fn vortex_array::arrays::Primitive::nbuffers(_array: &vortex_array::arrays:: pub fn vortex_array::arrays::Primitive::nchildren(array: &vortex_array::arrays::PrimitiveArray) -> usize +pub fn vortex_array::arrays::Primitive::plan_range_read(_metadata: &vortex_array::EmptyMetadata, row_range: core::ops::range::Range, _row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Primitive::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Primitive::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -3362,6 +3390,8 @@ pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::nbuffers(_array: &vortex pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::nchildren(array: &vortex_array::arrays::scalar_fn::ScalarFnArray) -> usize +pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -3444,6 +3474,8 @@ pub fn vortex_array::arrays::Shared::nbuffers(_array: &Self::Array) -> usize pub fn vortex_array::arrays::Shared::nchildren(_array: &Self::Array) -> usize +pub fn vortex_array::arrays::Shared::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Shared::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Shared::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -3562,6 +3594,8 @@ pub fn vortex_array::arrays::slice::Slice::nbuffers(_array: &Self::Array) -> usi pub fn vortex_array::arrays::slice::Slice::nchildren(_array: &Self::Array) -> usize +pub fn vortex_array::arrays::slice::Slice::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::slice::Slice::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::slice::Slice::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -3828,6 +3862,8 @@ pub fn vortex_array::arrays::Struct::nbuffers(_array: &vortex_array::arrays::Str pub fn vortex_array::arrays::Struct::nchildren(array: &vortex_array::arrays::StructArray) -> usize +pub fn vortex_array::arrays::Struct::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Struct::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Struct::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -4058,6 +4094,8 @@ pub fn vortex_array::arrays::VarBin::nbuffers(_array: &vortex_array::arrays::Var pub fn vortex_array::arrays::VarBin::nchildren(array: &vortex_array::arrays::VarBinArray) -> usize +pub fn vortex_array::arrays::VarBin::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::VarBin::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::VarBin::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -4474,6 +4512,8 @@ pub fn vortex_array::arrays::VarBinView::nbuffers(array: &vortex_array::arrays:: pub fn vortex_array::arrays::VarBinView::nchildren(array: &vortex_array::arrays::VarBinViewArray) -> usize +pub fn vortex_array::arrays::VarBinView::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::VarBinView::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::VarBinView::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -4710,6 +4750,8 @@ pub fn vortex_array::arrays::Bool::nbuffers(_array: &vortex_array::arrays::BoolA pub fn vortex_array::arrays::Bool::nchildren(array: &vortex_array::arrays::BoolArray) -> usize +pub fn vortex_array::arrays::Bool::plan_range_read(metadata: &vortex_array::ProstMetadata, row_range: core::ops::range::Range, _row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Bool::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Bool::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -4908,6 +4950,8 @@ pub fn vortex_array::arrays::Chunked::nbuffers(_array: &vortex_array::arrays::Ch pub fn vortex_array::arrays::Chunked::nchildren(array: &vortex_array::arrays::ChunkedArray) -> usize +pub fn vortex_array::arrays::Chunked::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Chunked::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Chunked::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -5080,6 +5124,8 @@ pub fn vortex_array::arrays::Constant::nbuffers(_array: &vortex_array::arrays::C pub fn vortex_array::arrays::Constant::nchildren(_array: &vortex_array::arrays::ConstantArray) -> usize +pub fn vortex_array::arrays::Constant::plan_range_read(_metadata: &vortex_array::scalar::Scalar, row_range: core::ops::range::Range, _row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Constant::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Constant::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -5240,6 +5286,8 @@ pub fn vortex_array::arrays::Decimal::nbuffers(_array: &vortex_array::arrays::De pub fn vortex_array::arrays::Decimal::nchildren(array: &vortex_array::arrays::DecimalArray) -> usize +pub fn vortex_array::arrays::Decimal::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Decimal::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Decimal::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -5430,6 +5478,8 @@ pub fn vortex_array::arrays::dict::Dict::nbuffers(_array: &vortex_array::arrays: pub fn vortex_array::arrays::dict::Dict::nchildren(_array: &vortex_array::arrays::dict::DictArray) -> usize +pub fn vortex_array::arrays::dict::Dict::plan_range_read(metadata: &vortex_array::ProstMetadata, row_range: core::ops::range::Range, row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::dict::Dict::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::dict::Dict::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -5602,6 +5652,8 @@ pub fn vortex_array::arrays::Extension::nbuffers(_array: &vortex_array::arrays:: pub fn vortex_array::arrays::Extension::nchildren(_array: &vortex_array::arrays::ExtensionArray) -> usize +pub fn vortex_array::arrays::Extension::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Extension::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Extension::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -5736,6 +5788,8 @@ pub fn vortex_array::arrays::Filter::nbuffers(_array: &Self::Array) -> usize pub fn vortex_array::arrays::Filter::nchildren(_array: &Self::Array) -> usize +pub fn vortex_array::arrays::Filter::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Filter::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Filter::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -5882,6 +5936,8 @@ pub fn vortex_array::arrays::FixedSizeList::nbuffers(_array: &vortex_array::arra pub fn vortex_array::arrays::FixedSizeList::nchildren(array: &vortex_array::arrays::FixedSizeListArray) -> usize +pub fn vortex_array::arrays::FixedSizeList::plan_range_read(_metadata: &vortex_array::EmptyMetadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::FixedSizeList::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::FixedSizeList::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -6042,6 +6098,8 @@ pub fn vortex_array::arrays::List::nbuffers(_array: &vortex_array::arrays::ListA pub fn vortex_array::arrays::List::nchildren(array: &vortex_array::arrays::ListArray) -> usize +pub fn vortex_array::arrays::List::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::List::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::List::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -6202,6 +6260,8 @@ pub fn vortex_array::arrays::ListView::nbuffers(_array: &vortex_array::arrays::L pub fn vortex_array::arrays::ListView::nchildren(array: &vortex_array::arrays::ListViewArray) -> usize +pub fn vortex_array::arrays::ListView::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::ListView::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::ListView::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -6360,6 +6420,8 @@ pub fn vortex_array::arrays::Masked::nbuffers(_array: &Self::Array) -> usize pub fn vortex_array::arrays::Masked::nchildren(array: &Self::Array) -> usize +pub fn vortex_array::arrays::Masked::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Masked::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Masked::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -6498,6 +6560,8 @@ pub fn vortex_array::arrays::null::Null::nbuffers(_array: &vortex_array::arrays: pub fn vortex_array::arrays::null::Null::nchildren(_array: &vortex_array::arrays::null::NullArray) -> usize +pub fn vortex_array::arrays::null::Null::plan_range_read(_metadata: &vortex_array::EmptyMetadata, row_range: core::ops::range::Range, _row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::null::Null::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::null::Null::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -6662,6 +6726,8 @@ pub fn vortex_array::arrays::Primitive::nbuffers(_array: &vortex_array::arrays:: pub fn vortex_array::arrays::Primitive::nchildren(array: &vortex_array::arrays::PrimitiveArray) -> usize +pub fn vortex_array::arrays::Primitive::plan_range_read(_metadata: &vortex_array::EmptyMetadata, row_range: core::ops::range::Range, _row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Primitive::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Primitive::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -6896,6 +6962,8 @@ pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::nbuffers(_array: &vortex pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::nchildren(array: &vortex_array::arrays::scalar_fn::ScalarFnArray) -> usize +pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -6968,6 +7036,8 @@ pub fn vortex_array::arrays::Shared::nbuffers(_array: &Self::Array) -> usize pub fn vortex_array::arrays::Shared::nchildren(_array: &Self::Array) -> usize +pub fn vortex_array::arrays::Shared::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Shared::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Shared::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -7084,6 +7154,8 @@ pub fn vortex_array::arrays::slice::Slice::nbuffers(_array: &Self::Array) -> usi pub fn vortex_array::arrays::slice::Slice::nchildren(_array: &Self::Array) -> usize +pub fn vortex_array::arrays::slice::Slice::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::slice::Slice::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::slice::Slice::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -7228,6 +7300,8 @@ pub fn vortex_array::arrays::Struct::nbuffers(_array: &vortex_array::arrays::Str pub fn vortex_array::arrays::Struct::nchildren(array: &vortex_array::arrays::StructArray) -> usize +pub fn vortex_array::arrays::Struct::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Struct::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Struct::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -7482,6 +7556,8 @@ pub fn vortex_array::arrays::VarBin::nbuffers(_array: &vortex_array::arrays::Var pub fn vortex_array::arrays::VarBin::nchildren(array: &vortex_array::arrays::VarBinArray) -> usize +pub fn vortex_array::arrays::VarBin::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::VarBin::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::VarBin::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -7712,6 +7788,8 @@ pub fn vortex_array::arrays::VarBinView::nbuffers(array: &vortex_array::arrays:: pub fn vortex_array::arrays::VarBinView::nchildren(array: &vortex_array::arrays::VarBinViewArray) -> usize +pub fn vortex_array::arrays::VarBinView::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::VarBinView::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::VarBinView::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -20444,6 +20522,152 @@ pub struct vortex_array::variants::Utf8Typed<'a>(_) pub mod vortex_array::vtable +pub mod vortex_array::vtable::range_read + +pub enum vortex_array::vtable::range_read::BufferSubRange + +pub vortex_array::vtable::range_read::BufferSubRange::Full + +pub vortex_array::vtable::range_read::BufferSubRange::Range(core::ops::range::Range) + +impl core::clone::Clone for vortex_array::vtable::BufferSubRange + +pub fn vortex_array::vtable::BufferSubRange::clone(&self) -> vortex_array::vtable::BufferSubRange + +impl core::fmt::Debug for vortex_array::vtable::BufferSubRange + +pub fn vortex_array::vtable::BufferSubRange::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub enum vortex_array::vtable::range_read::ChildRangeRead + +pub vortex_array::vtable::range_read::ChildRangeRead::Full + +pub vortex_array::vtable::range_read::ChildRangeRead::Recurse + +pub vortex_array::vtable::range_read::ChildRangeRead::Recurse::dtype: vortex_array::dtype::DType + +pub vortex_array::vtable::range_read::ChildRangeRead::Recurse::row_count: usize + +pub vortex_array::vtable::range_read::ChildRangeRead::Recurse::row_range: core::ops::range::Range + +impl core::clone::Clone for vortex_array::vtable::ChildRangeRead + +pub fn vortex_array::vtable::ChildRangeRead::clone(&self) -> vortex_array::vtable::ChildRangeRead + +impl core::fmt::Debug for vortex_array::vtable::ChildRangeRead + +pub fn vortex_array::vtable::ChildRangeRead::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub enum vortex_array::vtable::range_read::RangeDecodeInfo + +pub vortex_array::vtable::range_read::RangeDecodeInfo::FromChild + +pub vortex_array::vtable::range_read::RangeDecodeInfo::FromChild::child_idx: usize + +pub vortex_array::vtable::range_read::RangeDecodeInfo::FromChild::divisor: usize + +pub vortex_array::vtable::range_read::RangeDecodeInfo::Leaf + +pub vortex_array::vtable::range_read::RangeDecodeInfo::Leaf::decode_len: usize + +pub vortex_array::vtable::range_read::RangeDecodeInfo::Leaf::post_slice: core::option::Option> + +impl core::clone::Clone for vortex_array::vtable::RangeDecodeInfo + +pub fn vortex_array::vtable::RangeDecodeInfo::clone(&self) -> vortex_array::vtable::RangeDecodeInfo + +impl core::fmt::Debug for vortex_array::vtable::RangeDecodeInfo + +pub fn vortex_array::vtable::RangeDecodeInfo::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub struct vortex_array::vtable::range_read::EncodingRangeRead + +pub vortex_array::vtable::range_read::EncodingRangeRead::buffer_sub_ranges: alloc::vec::Vec + +pub vortex_array::vtable::range_read::EncodingRangeRead::children: alloc::vec::Vec + +pub vortex_array::vtable::range_read::EncodingRangeRead::decode_info: vortex_array::vtable::RangeDecodeInfo + +impl core::clone::Clone for vortex_array::vtable::EncodingRangeRead + +pub fn vortex_array::vtable::EncodingRangeRead::clone(&self) -> vortex_array::vtable::EncodingRangeRead + +impl core::fmt::Debug for vortex_array::vtable::EncodingRangeRead + +pub fn vortex_array::vtable::EncodingRangeRead::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub enum vortex_array::vtable::BufferSubRange + +pub vortex_array::vtable::BufferSubRange::Full + +pub vortex_array::vtable::BufferSubRange::Range(core::ops::range::Range) + +impl core::clone::Clone for vortex_array::vtable::BufferSubRange + +pub fn vortex_array::vtable::BufferSubRange::clone(&self) -> vortex_array::vtable::BufferSubRange + +impl core::fmt::Debug for vortex_array::vtable::BufferSubRange + +pub fn vortex_array::vtable::BufferSubRange::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub enum vortex_array::vtable::ChildRangeRead + +pub vortex_array::vtable::ChildRangeRead::Full + +pub vortex_array::vtable::ChildRangeRead::Recurse + +pub vortex_array::vtable::ChildRangeRead::Recurse::dtype: vortex_array::dtype::DType + +pub vortex_array::vtable::ChildRangeRead::Recurse::row_count: usize + +pub vortex_array::vtable::ChildRangeRead::Recurse::row_range: core::ops::range::Range + +impl core::clone::Clone for vortex_array::vtable::ChildRangeRead + +pub fn vortex_array::vtable::ChildRangeRead::clone(&self) -> vortex_array::vtable::ChildRangeRead + +impl core::fmt::Debug for vortex_array::vtable::ChildRangeRead + +pub fn vortex_array::vtable::ChildRangeRead::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub enum vortex_array::vtable::RangeDecodeInfo + +pub vortex_array::vtable::RangeDecodeInfo::FromChild + +pub vortex_array::vtable::RangeDecodeInfo::FromChild::child_idx: usize + +pub vortex_array::vtable::RangeDecodeInfo::FromChild::divisor: usize + +pub vortex_array::vtable::RangeDecodeInfo::Leaf + +pub vortex_array::vtable::RangeDecodeInfo::Leaf::decode_len: usize + +pub vortex_array::vtable::RangeDecodeInfo::Leaf::post_slice: core::option::Option> + +impl core::clone::Clone for vortex_array::vtable::RangeDecodeInfo + +pub fn vortex_array::vtable::RangeDecodeInfo::clone(&self) -> vortex_array::vtable::RangeDecodeInfo + +impl core::fmt::Debug for vortex_array::vtable::RangeDecodeInfo + +pub fn vortex_array::vtable::RangeDecodeInfo::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub struct vortex_array::vtable::EncodingRangeRead + +pub vortex_array::vtable::EncodingRangeRead::buffer_sub_ranges: alloc::vec::Vec + +pub vortex_array::vtable::EncodingRangeRead::children: alloc::vec::Vec + +pub vortex_array::vtable::EncodingRangeRead::decode_info: vortex_array::vtable::RangeDecodeInfo + +impl core::clone::Clone for vortex_array::vtable::EncodingRangeRead + +pub fn vortex_array::vtable::EncodingRangeRead::clone(&self) -> vortex_array::vtable::EncodingRangeRead + +impl core::fmt::Debug for vortex_array::vtable::EncodingRangeRead + +pub fn vortex_array::vtable::EncodingRangeRead::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + pub struct vortex_array::vtable::NotSupported impl vortex_array::vtable::OperationsVTable for vortex_array::vtable::NotSupported @@ -20490,6 +20714,8 @@ pub fn vortex_array::vtable::DynVTable::execute(&self, array: &vortex_array::Arr pub fn vortex_array::vtable::DynVTable::execute_parent(&self, array: &vortex_array::ArrayRef, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +pub fn vortex_array::vtable::DynVTable::plan_range_read(&self, metadata_bytes: &[u8], row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType, session: &vortex_session::VortexSession) -> core::option::Option + pub fn vortex_array::vtable::DynVTable::reduce(&self, array: &vortex_array::ArrayRef) -> vortex_error::VortexResult> pub fn vortex_array::vtable::DynVTable::reduce_parent(&self, array: &vortex_array::ArrayRef, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -20624,6 +20850,8 @@ pub fn vortex_array::vtable::VTable::nbuffers(array: &Self::Array) -> usize pub fn vortex_array::vtable::VTable::nchildren(array: &Self::Array) -> usize +pub fn vortex_array::vtable::VTable::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::vtable::VTable::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::vtable::VTable::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -20678,6 +20906,8 @@ pub fn vortex_array::arrays::Bool::nbuffers(_array: &vortex_array::arrays::BoolA pub fn vortex_array::arrays::Bool::nchildren(array: &vortex_array::arrays::BoolArray) -> usize +pub fn vortex_array::arrays::Bool::plan_range_read(metadata: &vortex_array::ProstMetadata, row_range: core::ops::range::Range, _row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Bool::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Bool::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -20732,6 +20962,8 @@ pub fn vortex_array::arrays::Chunked::nbuffers(_array: &vortex_array::arrays::Ch pub fn vortex_array::arrays::Chunked::nchildren(array: &vortex_array::arrays::ChunkedArray) -> usize +pub fn vortex_array::arrays::Chunked::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Chunked::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Chunked::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -20786,6 +21018,8 @@ pub fn vortex_array::arrays::Constant::nbuffers(_array: &vortex_array::arrays::C pub fn vortex_array::arrays::Constant::nchildren(_array: &vortex_array::arrays::ConstantArray) -> usize +pub fn vortex_array::arrays::Constant::plan_range_read(_metadata: &vortex_array::scalar::Scalar, row_range: core::ops::range::Range, _row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Constant::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Constant::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -20840,6 +21074,8 @@ pub fn vortex_array::arrays::Decimal::nbuffers(_array: &vortex_array::arrays::De pub fn vortex_array::arrays::Decimal::nchildren(array: &vortex_array::arrays::DecimalArray) -> usize +pub fn vortex_array::arrays::Decimal::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Decimal::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Decimal::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -20894,6 +21130,8 @@ pub fn vortex_array::arrays::Extension::nbuffers(_array: &vortex_array::arrays:: pub fn vortex_array::arrays::Extension::nchildren(_array: &vortex_array::arrays::ExtensionArray) -> usize +pub fn vortex_array::arrays::Extension::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Extension::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Extension::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -20948,6 +21186,8 @@ pub fn vortex_array::arrays::Filter::nbuffers(_array: &Self::Array) -> usize pub fn vortex_array::arrays::Filter::nchildren(_array: &Self::Array) -> usize +pub fn vortex_array::arrays::Filter::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Filter::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Filter::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -21002,6 +21242,8 @@ pub fn vortex_array::arrays::FixedSizeList::nbuffers(_array: &vortex_array::arra pub fn vortex_array::arrays::FixedSizeList::nchildren(array: &vortex_array::arrays::FixedSizeListArray) -> usize +pub fn vortex_array::arrays::FixedSizeList::plan_range_read(_metadata: &vortex_array::EmptyMetadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::FixedSizeList::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::FixedSizeList::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -21056,6 +21298,8 @@ pub fn vortex_array::arrays::List::nbuffers(_array: &vortex_array::arrays::ListA pub fn vortex_array::arrays::List::nchildren(array: &vortex_array::arrays::ListArray) -> usize +pub fn vortex_array::arrays::List::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::List::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::List::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -21110,6 +21354,8 @@ pub fn vortex_array::arrays::ListView::nbuffers(_array: &vortex_array::arrays::L pub fn vortex_array::arrays::ListView::nchildren(array: &vortex_array::arrays::ListViewArray) -> usize +pub fn vortex_array::arrays::ListView::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::ListView::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::ListView::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -21164,6 +21410,8 @@ pub fn vortex_array::arrays::Masked::nbuffers(_array: &Self::Array) -> usize pub fn vortex_array::arrays::Masked::nchildren(array: &Self::Array) -> usize +pub fn vortex_array::arrays::Masked::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Masked::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Masked::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -21218,6 +21466,8 @@ pub fn vortex_array::arrays::Primitive::nbuffers(_array: &vortex_array::arrays:: pub fn vortex_array::arrays::Primitive::nchildren(array: &vortex_array::arrays::PrimitiveArray) -> usize +pub fn vortex_array::arrays::Primitive::plan_range_read(_metadata: &vortex_array::EmptyMetadata, row_range: core::ops::range::Range, _row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Primitive::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Primitive::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -21272,6 +21522,8 @@ pub fn vortex_array::arrays::Shared::nbuffers(_array: &Self::Array) -> usize pub fn vortex_array::arrays::Shared::nchildren(_array: &Self::Array) -> usize +pub fn vortex_array::arrays::Shared::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Shared::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Shared::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -21326,6 +21578,8 @@ pub fn vortex_array::arrays::Struct::nbuffers(_array: &vortex_array::arrays::Str pub fn vortex_array::arrays::Struct::nchildren(array: &vortex_array::arrays::StructArray) -> usize +pub fn vortex_array::arrays::Struct::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::Struct::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::Struct::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -21380,6 +21634,8 @@ pub fn vortex_array::arrays::VarBin::nbuffers(_array: &vortex_array::arrays::Var pub fn vortex_array::arrays::VarBin::nchildren(array: &vortex_array::arrays::VarBinArray) -> usize +pub fn vortex_array::arrays::VarBin::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::VarBin::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::VarBin::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -21434,6 +21690,8 @@ pub fn vortex_array::arrays::VarBinView::nbuffers(array: &vortex_array::arrays:: pub fn vortex_array::arrays::VarBinView::nchildren(array: &vortex_array::arrays::VarBinViewArray) -> usize +pub fn vortex_array::arrays::VarBinView::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::VarBinView::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::VarBinView::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -21488,6 +21746,8 @@ pub fn vortex_array::arrays::dict::Dict::nbuffers(_array: &vortex_array::arrays: pub fn vortex_array::arrays::dict::Dict::nchildren(_array: &vortex_array::arrays::dict::DictArray) -> usize +pub fn vortex_array::arrays::dict::Dict::plan_range_read(metadata: &vortex_array::ProstMetadata, row_range: core::ops::range::Range, row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::dict::Dict::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::dict::Dict::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -21542,6 +21802,8 @@ pub fn vortex_array::arrays::null::Null::nbuffers(_array: &vortex_array::arrays: pub fn vortex_array::arrays::null::Null::nchildren(_array: &vortex_array::arrays::null::NullArray) -> usize +pub fn vortex_array::arrays::null::Null::plan_range_read(_metadata: &vortex_array::EmptyMetadata, row_range: core::ops::range::Range, _row_count: usize, _dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::null::Null::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::null::Null::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -21596,6 +21858,8 @@ pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::nbuffers(_array: &vortex pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::nchildren(array: &vortex_array::arrays::scalar_fn::ScalarFnArray) -> usize +pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> @@ -21650,6 +21914,8 @@ pub fn vortex_array::arrays::slice::Slice::nbuffers(_array: &Self::Array) -> usi pub fn vortex_array::arrays::slice::Slice::nchildren(_array: &Self::Array) -> usize +pub fn vortex_array::arrays::slice::Slice::plan_range_read(metadata: &Self::Metadata, row_range: core::ops::range::Range, row_count: usize, dtype: &vortex_array::dtype::DType) -> core::option::Option + pub fn vortex_array::arrays::slice::Slice::reduce(array: &Self::Array) -> vortex_error::VortexResult> pub fn vortex_array::arrays::slice::Slice::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> diff --git a/vortex-array/src/arrays/bool/vtable/mod.rs b/vortex-array/src/arrays/bool/vtable/mod.rs index d439ecf0e78..fc87f78ab8d 100644 --- a/vortex-array/src/arrays/bool/vtable/mod.rs +++ b/vortex-array/src/arrays/bool/vtable/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Range; + use kernel::PARENT_KERNELS; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -22,6 +24,9 @@ use crate::dtype::DType; use crate::serde::ArrayChildren; use crate::validity::Validity; use crate::vtable; +use crate::vtable::BufferSubRange; +use crate::vtable::EncodingRangeRead; +use crate::vtable::RangeDecodeInfo; use crate::vtable::VTable; use crate::vtable::ValidityVTableFromValidityHelper; use crate::vtable::validity_nchildren; @@ -205,6 +210,30 @@ impl VTable for Bool { ) -> VortexResult> { PARENT_KERNELS.execute(array, parent, child_idx, ctx) } + + fn plan_range_read( + metadata: &ProstMetadata, + row_range: Range, + _row_count: usize, + _dtype: &DType, + ) -> Option { + // Only support offset=0 and byte-aligned start. + if metadata.0.offset != 0 || !row_range.start.is_multiple_of(8) { + return None; + } + + let byte_start = row_range.start / 8; + let byte_end = row_range.end.div_ceil(8); + + Some(EncodingRangeRead { + buffer_sub_ranges: vec![BufferSubRange::Range(byte_start..byte_end)], + children: vec![], + decode_info: RangeDecodeInfo::Leaf { + decode_len: row_range.len(), + post_slice: None, + }, + }) + } } #[derive(Debug)] diff --git a/vortex-array/src/arrays/constant/vtable/mod.rs b/vortex-array/src/arrays/constant/vtable/mod.rs index d35b874193b..9a8e96392c9 100644 --- a/vortex-array/src/arrays/constant/vtable/mod.rs +++ b/vortex-array/src/arrays/constant/vtable/mod.rs @@ -3,6 +3,7 @@ use std::fmt::Debug; use std::hash::Hash; +use std::ops::Range; use vortex_buffer::ByteBufferMut; use vortex_error::VortexExpect; @@ -37,6 +38,9 @@ use crate::serde::ArrayChildren; use crate::stats::StatsSetRef; use crate::vtable; use crate::vtable::ArrayId; +use crate::vtable::BufferSubRange; +use crate::vtable::EncodingRangeRead; +use crate::vtable::RangeDecodeInfo; use crate::vtable::VTable; pub(crate) mod canonical; mod operations; @@ -178,6 +182,24 @@ impl VTable for Constant { PARENT_RULES.evaluate(array, parent, child_idx) } + fn plan_range_read( + _metadata: &Scalar, + row_range: Range, + _row_count: usize, + _dtype: &DType, + ) -> Option { + // Constant arrays have no data buffers to sub-range; only the scalar buffer + // which must be included fully. + Some(EncodingRangeRead { + buffer_sub_ranges: vec![BufferSubRange::Full], + children: vec![], + decode_info: RangeDecodeInfo::Leaf { + decode_len: row_range.len(), + post_slice: None, + }, + }) + } + fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult { Ok(ExecutionStep::Done( constant_canonicalize(array)?.into_array(), diff --git a/vortex-array/src/arrays/dict/vtable/mod.rs b/vortex-array/src/arrays/dict/vtable/mod.rs index 2903967c230..18600277da8 100644 --- a/vortex-array/src/arrays/dict/vtable/mod.rs +++ b/vortex-array/src/arrays/dict/vtable/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::hash::Hash; +use std::ops::Range; use kernel::PARENT_KERNELS; use vortex_error::VortexResult; @@ -37,6 +38,9 @@ use crate::serde::ArrayChildren; use crate::stats::StatsSetRef; use crate::vtable; use crate::vtable::ArrayId; +use crate::vtable::ChildRangeRead; +use crate::vtable::EncodingRangeRead; +use crate::vtable::RangeDecodeInfo; use crate::vtable::VTable; mod kernel; mod operations; @@ -230,6 +234,34 @@ impl VTable for Dict { ) -> VortexResult> { PARENT_KERNELS.execute(array, parent, child_idx, ctx) } + + fn plan_range_read( + metadata: &ProstMetadata, + row_range: Range, + row_count: usize, + _dtype: &DType, + ) -> Option { + let codes_ptype = PType::try_from(metadata.0.codes_ptype).ok()?; + let codes_dtype = DType::Primitive(codes_ptype, Nullability::NonNullable); + + Some(EncodingRangeRead { + buffer_sub_ranges: vec![], + children: vec![ + // Child 0 = codes (row-dependent, recurse). + ChildRangeRead::Recurse { + row_range, + row_count, + dtype: codes_dtype, + }, + // Child 1 = values (global dictionary, include fully). + ChildRangeRead::Full, + ], + decode_info: RangeDecodeInfo::FromChild { + child_idx: 0, + divisor: 1, + }, + }) + } } /// Check for fast-path execution conditions. diff --git a/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs b/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs index d5c6f673671..b33cdcafd30 100644 --- a/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs +++ b/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::hash::Hash; +use std::ops::Range; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -27,6 +28,9 @@ use crate::stats::StatsSetRef; use crate::validity::Validity; use crate::vtable; use crate::vtable::ArrayId; +use crate::vtable::ChildRangeRead; +use crate::vtable::EncodingRangeRead; +use crate::vtable::RangeDecodeInfo; use crate::vtable::VTable; use crate::vtable::ValidityVTableFromValidityHelper; use crate::vtable::validity_nchildren; @@ -222,4 +226,34 @@ impl VTable for FixedSizeList { fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult { Ok(ExecutionStep::Done(array.clone().into_array())) } + + fn plan_range_read( + _metadata: &EmptyMetadata, + row_range: Range, + row_count: usize, + dtype: &DType, + ) -> Option { + let (element_dtype, list_size) = match dtype { + DType::FixedSizeList(element_dtype, list_size, _) => { + (element_dtype.as_ref().clone(), *list_size as usize) + } + _ => return None, + }; + + let element_range = (row_range.start * list_size)..(row_range.end * list_size); + let element_count = row_count * list_size; + + Some(EncodingRangeRead { + buffer_sub_ranges: vec![], + children: vec![ChildRangeRead::Recurse { + row_range: element_range, + row_count: element_count, + dtype: element_dtype, + }], + decode_info: RangeDecodeInfo::FromChild { + child_idx: 0, + divisor: list_size, + }, + }) + } } diff --git a/vortex-array/src/arrays/null/mod.rs b/vortex-array/src/arrays/null/mod.rs index e2ac02f247c..001968c14cf 100644 --- a/vortex-array/src/arrays/null/mod.rs +++ b/vortex-array/src/arrays/null/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::hash::Hash; +use std::ops::Range; use vortex_error::VortexResult; use vortex_error::vortex_ensure; @@ -24,7 +25,9 @@ use crate::stats::StatsSetRef; use crate::validity::Validity; use crate::vtable; use crate::vtable::ArrayId; +use crate::vtable::EncodingRangeRead; use crate::vtable::OperationsVTable; +use crate::vtable::RangeDecodeInfo; use crate::vtable::VTable; use crate::vtable::ValidityVTable; @@ -135,6 +138,22 @@ impl VTable for Null { fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult { Ok(ExecutionStep::Done(array.clone().into_array())) } + + fn plan_range_read( + _metadata: &EmptyMetadata, + row_range: Range, + _row_count: usize, + _dtype: &DType, + ) -> Option { + Some(EncodingRangeRead { + buffer_sub_ranges: vec![], + children: vec![], + decode_info: RangeDecodeInfo::Leaf { + decode_len: row_range.len(), + post_slice: None, + }, + }) + } } /// A array where all values are null. diff --git a/vortex-array/src/arrays/primitive/vtable/mod.rs b/vortex-array/src/arrays/primitive/vtable/mod.rs index c48a72217b1..7a6aa9d0ecf 100644 --- a/vortex-array/src/arrays/primitive/vtable/mod.rs +++ b/vortex-array/src/arrays/primitive/vtable/mod.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Range; + use kernel::PARENT_KERNELS; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -20,6 +22,9 @@ use crate::dtype::PType; use crate::serde::ArrayChildren; use crate::validity::Validity; use crate::vtable; +use crate::vtable::BufferSubRange; +use crate::vtable::EncodingRangeRead; +use crate::vtable::RangeDecodeInfo; use crate::vtable::VTable; use crate::vtable::ValidityVTableFromValidityHelper; use crate::vtable::validity_nchildren; @@ -219,6 +224,28 @@ impl VTable for Primitive { ) -> VortexResult> { PARENT_KERNELS.execute(array, parent, child_idx, ctx) } + + fn plan_range_read( + _metadata: &EmptyMetadata, + row_range: Range, + _row_count: usize, + dtype: &DType, + ) -> Option { + let byte_width = match dtype { + DType::Primitive(ptype, _) => ptype.byte_width(), + _ => return None, + }; + Some(EncodingRangeRead { + buffer_sub_ranges: vec![BufferSubRange::Range( + row_range.start * byte_width..row_range.end * byte_width, + )], + children: vec![], + decode_info: RangeDecodeInfo::Leaf { + decode_len: row_range.len(), + post_slice: None, + }, + }) + } } #[derive(Debug)] diff --git a/vortex-array/src/vtable/dyn_.rs b/vortex-array/src/vtable/dyn_.rs index 64ca5e99c50..5ee0aeced09 100644 --- a/vortex-array/src/vtable/dyn_.rs +++ b/vortex-array/src/vtable/dyn_.rs @@ -6,6 +6,7 @@ use std::fmt; use std::fmt::Debug; use std::fmt::Formatter; use std::marker::PhantomData; +use std::ops::Range; use arcref::ArcRef; use vortex_error::VortexExpect; @@ -23,6 +24,7 @@ use crate::dtype::DType; use crate::executor::ExecutionCtx; use crate::serde::ArrayChildren; use crate::vtable::VTable; +use crate::vtable::range_read::EncodingRangeRead; /// ArrayId is a globally unique name for the array's vtable. pub type ArrayId = ArcRef; @@ -71,6 +73,16 @@ pub trait DynVTable: 'static + private::Sealed + Send + Sync + Debug { child_idx: usize, ctx: &mut ExecutionCtx, ) -> VortexResult>; + + /// See [`VTable::plan_range_read`] + fn plan_range_read( + &self, + metadata_bytes: &[u8], + row_range: Range, + row_count: usize, + dtype: &DType, + session: &VortexSession, + ) -> Option; } /// Adapter struct used to lift the [`VTable`] trait into an object-safe [`DynVTable`] @@ -197,6 +209,18 @@ impl DynVTable for ArrayVTableAdapter { Ok(Some(result)) } + + fn plan_range_read( + &self, + metadata_bytes: &[u8], + row_range: Range, + row_count: usize, + dtype: &DType, + session: &VortexSession, + ) -> Option { + let metadata = V::deserialize(metadata_bytes, dtype, row_count, &[], session).ok()?; + V::plan_range_read(&metadata, row_range, row_count, dtype) + } } fn downcast(array: &ArrayRef) -> &V::Array { diff --git a/vortex-array/src/vtable/mod.rs b/vortex-array/src/vtable/mod.rs index 2b7ab9bd620..9a22ac81ade 100644 --- a/vortex-array/src/vtable/mod.rs +++ b/vortex-array/src/vtable/mod.rs @@ -5,14 +5,17 @@ mod dyn_; mod operations; +pub mod range_read; mod validity; use std::fmt::Debug; use std::hash::Hasher; use std::ops::Deref; +use std::ops::Range; pub use dyn_::*; pub use operations::*; +pub use range_read::*; pub use validity::*; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -239,6 +242,28 @@ pub trait VTable: 'static + Sized + Send + Sync + Debug { _ = (array, parent, child_idx); Ok(None) } + + /// Plan a sub-segment range read for the given row range. + /// + /// This method is called during I/O planning to determine which byte ranges of a + /// segment's buffers are needed for a given row range, allowing targeted reads instead + /// of fetching the entire segment. + /// + /// The returned [`EncodingRangeRead::children`] should cover the encoding's own + /// children (e.g., codes + values for Dict). The planner will detect any additional + /// children (e.g., validity) not covered by the plan and fall back to a full read. + /// + /// Returns `None` if the encoding does not support range reads, in which case the + /// caller falls back to reading the full segment. + fn plan_range_read( + metadata: &Self::Metadata, + row_range: Range, + row_count: usize, + dtype: &DType, + ) -> Option { + _ = (metadata, row_range, row_count, dtype); + None + } } /// Placeholder type used to indicate when a particular vtable is not supported by the encoding. diff --git a/vortex-array/src/vtable/range_read.rs b/vortex-array/src/vtable/range_read.rs new file mode 100644 index 00000000000..edad16ded28 --- /dev/null +++ b/vortex-array/src/vtable/range_read.rs @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Types for encoding-level range read planning. +//! +//! Each encoding can implement [`VTable::plan_range_read`](super::VTable::plan_range_read) to describe how its buffers and +//! children should be handled when only a sub-range of rows is needed. The layout planner +//! uses this information to issue targeted IO instead of reading the full segment. + +use std::ops::Range; + +use crate::dtype::DType; + +/// Describes how a single encoding node handles a sub-segment range read. +/// +/// Returned by [`VTable::plan_range_read`](super::VTable::plan_range_read) to tell the planner which buffer sub-ranges +/// are needed and how to handle children. +#[derive(Debug, Clone)] +pub struct EncodingRangeRead { + /// For each of this encoding's buffers (by local index), the sub-range needed. + pub buffer_sub_ranges: Vec, + /// For each child (by local index), how to handle it. + pub children: Vec, + /// How to compute the decode parameters. + pub decode_info: RangeDecodeInfo, +} + +/// Specifies which portion of a buffer is needed for a range read. +#[derive(Debug, Clone)] +pub enum BufferSubRange { + /// The entire buffer is needed. + Full, + /// Only the specified byte range within the buffer is needed. + Range(Range), +} + +/// Specifies how a child should be handled during a range read. +#[derive(Debug, Clone)] +pub enum ChildRangeRead { + /// Recurse into this child's encoding tree with the given row range. + Recurse { + /// The row range to request from this child. + row_range: Range, + /// The total row count of this child (before sub-ranging). + row_count: usize, + /// The DType of this child. + dtype: DType, + }, + /// Include all of this child's buffers fully (no sub-ranging). + Full, +} + +/// Describes how to compute `decode_len` and `post_slice` for the range read. +#[derive(Debug, Clone)] +pub enum RangeDecodeInfo { + /// Self-contained decode parameters (for leaf encodings like Primitive, BitPacked, Bool). + Leaf { + /// The number of rows to pass to `decode()`. + decode_len: usize, + /// After decoding, slice to this range. `None` means no slicing needed. + post_slice: Option>, + }, + /// Delegate to a child's decode info, optionally dividing by a factor. + /// + /// - `divisor = 1`: transparent delegation (FoR, ZigZag, Dict, ALP, ALPRD). + /// - `divisor = list_size`: for FixedSizeList, where the child operates in element space. + /// + /// The planner will check divisibility and fall back to a full read if it fails. + FromChild { + /// The child index whose decode info to use. + child_idx: usize, + /// Divisor to apply to `decode_len` and `post_slice` ranges. + divisor: usize, + }, +} diff --git a/vortex-file/public-api.lock b/vortex-file/public-api.lock index fc73045c63e..9477051ec20 100644 --- a/vortex-file/public-api.lock +++ b/vortex-file/public-api.lock @@ -40,6 +40,8 @@ impl vortex_layout::segments::source::SegmentSource for vortex_file::segments::F pub fn vortex_file::segments::FileSegmentSource::request(&self, id: vortex_layout::segments::SegmentId) -> vortex_layout::segments::source::SegmentFuture +pub fn vortex_file::segments::FileSegmentSource::request_range(&self, id: vortex_layout::segments::SegmentId, range: core::ops::range::Range) -> vortex_layout::segments::source::SegmentFuture + pub struct vortex_file::segments::InitialReadSegmentCache pub vortex_file::segments::InitialReadSegmentCache::fallback: alloc::sync::Arc diff --git a/vortex-file/src/segments/source.rs b/vortex-file/src/segments/source.rs index c27d9e2f944..c87db903fd4 100644 --- a/vortex-file/src/segments/source.rs +++ b/vortex-file/src/segments/source.rs @@ -134,6 +134,45 @@ impl FileSegmentSource { } impl SegmentSource for FileSegmentSource { + fn request_range(&self, id: SegmentId, range: std::ops::Range) -> SegmentFuture { + let spec = *match self.segments.get(*id as usize) { + Some(spec) => spec, + None => { + return future::ready(Err(vortex_err!("Missing segment: {}", id))).boxed(); + } + }; + + // Issue a targeted read for only the requested byte range within the segment. + let offset = spec.offset + range.start as u64; + let length = range.len(); + + let (send, recv) = oneshot::channel(); + let req_id = self.next_id.fetch_add(1, Ordering::Relaxed); + let event = ReadEvent::Request(ReadRequest { + id: req_id, + offset, + length, + // No alignment requirement for partial reads; the caller is responsible + // for re-aligning individual buffers extracted from the partial segment. + alignment: Alignment::none(), + callback: send, + }); + + if let Err(e) = self.events.unbounded_send(event) { + return future::ready(Err(vortex_err!("Failed to submit read request: {e}"))).boxed(); + } + + let fut = ReadFuture { + id: req_id, + recv, + polled: false, + finished: false, + events: self.events.clone(), + }; + + fut.boxed() + } + fn request(&self, id: SegmentId) -> SegmentFuture { // We eagerly register the read request here assuming the behaviour of [`FileRead`], where // coalescing becomes effective prior to the future being polled. @@ -270,6 +309,31 @@ impl BufferSegmentSource { } impl SegmentSource for BufferSegmentSource { + fn request_range(&self, id: SegmentId, range: std::ops::Range) -> SegmentFuture { + let spec = match self.segments.get(*id as usize) { + Some(spec) => spec, + None => { + return future::ready(Err(vortex_err!("Missing segment: {}", id))).boxed(); + } + }; + + let start = spec.offset as usize + range.start; + let end = spec.offset as usize + range.end; + if end > self.buffer.len() { + return future::ready(Err(vortex_err!( + "Segment {} range {}..{} out of bounds for buffer of length {}", + *id, + start, + end, + self.buffer.len() + ))) + .boxed(); + } + + let slice = self.buffer.slice_unaligned(start..end); + future::ready(Ok(BufferHandle::new_host(slice))).boxed() + } + fn request(&self, id: SegmentId) -> SegmentFuture { let spec = match self.segments.get(*id as usize) { Some(spec) => spec, diff --git a/vortex-layout/Cargo.toml b/vortex-layout/Cargo.toml index e7b66a9bd43..2b2900ce781 100644 --- a/vortex-layout/Cargo.toml +++ b/vortex-layout/Cargo.toml @@ -52,9 +52,16 @@ vortex-utils = { workspace = true, features = ["dashmap"] } futures = { workspace = true, features = ["executor"] } rstest = { workspace = true } tokio = { workspace = true, features = ["rt", "macros"] } +vortex-alp = { workspace = true } vortex-array = { path = "../vortex-array", features = ["_test-harness"] } +vortex-btrblocks = { workspace = true } +vortex-bytebool = { workspace = true } +vortex-datetime-parts = { workspace = true } +vortex-decimal-byte-parts = { workspace = true } +vortex-fastlanes = { workspace = true } vortex-io = { path = "../vortex-io", features = ["tokio"] } vortex-utils = { workspace = true, features = ["_test-harness"] } +vortex-zigzag = { workspace = true } [features] _test-harness = [] diff --git a/vortex-layout/public-api.lock b/vortex-layout/public-api.lock index 8163fa19489..88ec629a146 100644 --- a/vortex-layout/public-api.lock +++ b/vortex-layout/public-api.lock @@ -1012,6 +1012,8 @@ impl vortex_layout::segments::SegmentSource for vortex_layout::segments::Segment pub fn vortex_layout::segments::SegmentCacheSourceAdapter::request(&self, id: vortex_layout::segments::SegmentId) -> vortex_layout::segments::SegmentFuture +pub fn vortex_layout::segments::SegmentCacheSourceAdapter::request_range(&self, id: vortex_layout::segments::SegmentId, range: core::ops::range::Range) -> vortex_layout::segments::SegmentFuture + pub struct vortex_layout::segments::SegmentId(_) impl core::clone::Clone for vortex_layout::segments::SegmentId @@ -1078,6 +1080,8 @@ impl vortex_layout::segments::Segment pub fn vortex_layout::segments::SharedSegmentSource::request(&self, id: vortex_layout::segments::SegmentId) -> vortex_layout::segments::SegmentFuture +pub fn vortex_layout::segments::SharedSegmentSource::request_range(&self, id: vortex_layout::segments::SegmentId, range: core::ops::range::Range) -> vortex_layout::segments::SegmentFuture + pub trait vortex_layout::segments::SegmentCache: core::marker::Send + core::marker::Sync pub fn vortex_layout::segments::SegmentCache::get<'life0, 'async_trait>(&'life0 self, id: vortex_layout::segments::SegmentId) -> core::pin::Pin>> + core::marker::Send + 'async_trait)>> where Self: 'async_trait, 'life0: 'async_trait @@ -1110,14 +1114,20 @@ pub trait vortex_layout::segments::SegmentSource: 'static + core::marker::Send + pub fn vortex_layout::segments::SegmentSource::request(&self, id: vortex_layout::segments::SegmentId) -> vortex_layout::segments::SegmentFuture +pub fn vortex_layout::segments::SegmentSource::request_range(&self, id: vortex_layout::segments::SegmentId, range: core::ops::range::Range) -> vortex_layout::segments::SegmentFuture + impl vortex_layout::segments::SegmentSource for vortex_layout::segments::SegmentCacheSourceAdapter pub fn vortex_layout::segments::SegmentCacheSourceAdapter::request(&self, id: vortex_layout::segments::SegmentId) -> vortex_layout::segments::SegmentFuture +pub fn vortex_layout::segments::SegmentCacheSourceAdapter::request_range(&self, id: vortex_layout::segments::SegmentId, range: core::ops::range::Range) -> vortex_layout::segments::SegmentFuture + impl vortex_layout::segments::SegmentSource for vortex_layout::segments::SharedSegmentSource pub fn vortex_layout::segments::SharedSegmentSource::request(&self, id: vortex_layout::segments::SegmentId) -> vortex_layout::segments::SegmentFuture +pub fn vortex_layout::segments::SharedSegmentSource::request_range(&self, id: vortex_layout::segments::SegmentId, range: core::ops::range::Range) -> vortex_layout::segments::SegmentFuture + pub type vortex_layout::segments::SegmentFuture = futures_core::future::BoxFuture<'static, vortex_error::VortexResult> pub type vortex_layout::segments::SegmentSinkRef = alloc::sync::Arc @@ -1246,6 +1256,20 @@ impl core::fmt::Debug for vortex_layout::session::LayoutSession pub fn vortex_layout::session::LayoutSession::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result +pub struct vortex_layout::session::RangeReadEnabled(pub bool) + +impl core::clone::Clone for vortex_layout::session::RangeReadEnabled + +pub fn vortex_layout::session::RangeReadEnabled::clone(&self) -> vortex_layout::session::RangeReadEnabled + +impl core::default::Default for vortex_layout::session::RangeReadEnabled + +pub fn vortex_layout::session::RangeReadEnabled::default() -> Self + +impl core::fmt::Debug for vortex_layout::session::RangeReadEnabled + +pub fn vortex_layout::session::RangeReadEnabled::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + pub trait vortex_layout::session::LayoutSessionExt: vortex_session::SessionExt pub fn vortex_layout::session::LayoutSessionExt::layouts(&self) -> vortex_session::Ref<'_, vortex_layout::session::LayoutSession> diff --git a/vortex-layout/src/layouts/flat/mod.rs b/vortex-layout/src/layouts/flat/mod.rs index 6fa95a5e256..547a25e3903 100644 --- a/vortex-layout/src/layouts/flat/mod.rs +++ b/vortex-layout/src/layouts/flat/mod.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +pub(super) mod range_read; mod reader; pub mod writer; diff --git a/vortex-layout/src/layouts/flat/range_read.rs b/vortex-layout/src/layouts/flat/range_read.rs new file mode 100644 index 00000000000..5e353fb9d80 --- /dev/null +++ b/vortex-layout/src/layouts/flat/range_read.rs @@ -0,0 +1,966 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Range read support for FlatLayout segments. +//! +//! When the `array_tree` metadata is inlined in the layout (via `FLAT_LAYOUT_INLINE_ARRAY_NODE`), +//! we can inspect the encoding tree to compute which byte ranges of the segment are needed for a +//! given row range. This allows us to issue a smaller, targeted IO instead of reading the entire +//! segment. +//! +//! Each encoding implements [`VTable::plan_range_read`] to describe how its buffers and children +//! should be handled. The planner dispatches via vtable and recursively walks the encoding tree. + +use std::ops::Range; +use std::sync::Arc; + +use flatbuffers::root; +use futures::FutureExt; +use vortex_array::buffer::BufferHandle; +use vortex_array::dtype::DType; +use vortex_array::serde::ArrayParts; +use vortex_array::session::ArraySessionExt; +use vortex_array::vtable::range_read::BufferSubRange; +use vortex_array::vtable::range_read::ChildRangeRead; +use vortex_array::vtable::range_read::RangeDecodeInfo; +use vortex_buffer::Alignment; +use vortex_buffer::ByteBuffer; +use vortex_error::VortexResult; +use vortex_error::vortex_err; +use vortex_flatbuffers::array as fba; +use vortex_session::VortexSession; +use vortex_session::registry::ReadContext; + +use crate::layouts::SharedArrayFuture; +use crate::segments::SegmentId; +use crate::segments::SegmentSource; + +/// Maximum ratio of (range_size / full_segment_size) at which we attempt a range read. +/// If the range read would read more than this fraction of the full segment, we fall back to +/// a full read to avoid the overhead of range computation. +const RANGE_READ_THRESHOLD: f64 = 0.5; + +/// A plan describing which bytes to read from a segment for a given row range. +#[derive(Debug)] +pub(super) struct RangeReadPlan { + /// The byte range to read from the segment. + segment_byte_range: Range, + /// For each buffer index in the array_tree, the byte range within the partial segment + /// that contains that buffer's data. Empty range means the buffer is not needed. + buffer_ranges: Vec>, + /// The alignment requirement for each buffer. + buffer_alignments: Vec, + /// The number of logical rows to pass to `decode`. + decode_len: usize, + /// After decoding, slice the result to this range (relative to decode output). + /// `None` means no post-decode slicing is needed. + post_slice: Option>, +} + +/// Decode information returned by encoding analysis. +/// Describes how to decode the array after a range read. +#[derive(Debug, Clone)] +struct DecodeInfo { + /// The number of rows to pass to `decode()` at this level. + decode_len: usize, + /// After decoding, slice to this range. `None` means no slicing needed. + post_slice: Option>, +} + +/// Buffer offset, length, and alignment within the full segment. +#[derive(Debug, Clone)] +struct BufferLocation { + /// Byte offset within the full segment (after padding). + offset: usize, + /// Length in bytes. + length: usize, + /// Alignment requirement. + alignment: Alignment, +} + +/// Compute buffer locations from the Array flatbuffer's buffer descriptors. +fn compute_buffer_locations(fb_array: &fba::Array<'_>) -> Vec { + let mut offset = 0usize; + fb_array + .buffers() + .unwrap_or_default() + .iter() + .map(|buf| { + offset += buf.padding() as usize; + let loc = BufferLocation { + offset, + length: buf.length() as usize, + alignment: Alignment::from_exponent(buf.alignment_exponent()), + }; + offset += buf.length() as usize; + loc + }) + .collect() +} + +/// Tracks which buffers are needed and their required byte sub-ranges. +struct NeededBuffers { + /// For each buffer index: `Some(sub_range_within_buffer)` if needed, `None` if not. + entries: Vec>>, +} + +impl NeededBuffers { + fn new(num_buffers: usize) -> Self { + Self { + entries: vec![None; num_buffers], + } + } + + /// Mark a buffer as fully needed. + fn need_full(&mut self, buffer_idx: u16) { + let idx = buffer_idx as usize; + if idx < self.entries.len() { + self.entries[idx] = Some(0..usize::MAX); + } + } + + /// Mark a sub-range of a buffer as needed. + fn need_range(&mut self, buffer_idx: u16, range: Range) { + let idx = buffer_idx as usize; + if idx < self.entries.len() { + match &self.entries[idx] { + Some(existing) if existing.end == usize::MAX => { + // Already marked as fully needed, keep it. + } + Some(existing) => { + // Merge ranges (union). + let start = existing.start.min(range.start); + let end = existing.end.max(range.end); + self.entries[idx] = Some(start..end); + } + None => { + self.entries[idx] = Some(range); + } + } + } + } +} + +/// Recursively analyze the encoding tree via vtable dispatch to determine which buffer +/// byte ranges are needed for the given row range. +/// +/// Returns `Some(DecodeInfo)` if the encoding tree supports range reads, `None` to fall back. +fn analyze_encoding( + node: fba::ArrayNode<'_>, + row_range: Range, + row_count: usize, + dtype: &DType, + ctx: &ReadContext, + session: &VortexSession, + needed: &mut NeededBuffers, +) -> Option { + let encoding_id = ctx.resolve(node.encoding())?; + let vtable = session.arrays().registry().find(&encoding_id)?; + let metadata_bytes = node.metadata().map(|m| m.bytes()).unwrap_or(&[]); + + let plan = vtable.plan_range_read(metadata_bytes, row_range, row_count, dtype, session)?; + + // Apply buffer sub-ranges (local index → global index via flatbuffer). + apply_buffer_sub_ranges(node, &plan.buffer_sub_ranges, needed); + + // Match plan children against node children and recurse. + let child_decode_infos = resolve_children(node, &plan.children, ctx, session, needed)?; + + // Resolve decode info from the plan. + resolve_decode_info(plan.decode_info, &child_decode_infos) +} + +/// Map the plan's buffer sub-ranges to global buffer indices via the flatbuffer node. +fn apply_buffer_sub_ranges( + node: fba::ArrayNode<'_>, + sub_ranges: &[BufferSubRange], + needed: &mut NeededBuffers, +) { + if let Some(buffers) = node.buffers() { + for (local_idx, sub_range) in sub_ranges.iter().enumerate() { + if local_idx < buffers.len() { + let global_idx = buffers.get(local_idx); + match sub_range { + BufferSubRange::Full => needed.need_full(global_idx), + BufferSubRange::Range(range) => needed.need_range(global_idx, range.clone()), + } + } + } + } +} + +/// Pair each plan child with its corresponding node child and process them. +/// +/// Returns `None` if the plan's children don't exactly match the node's children count, +/// which typically means there is an unhandled validity child. +fn resolve_children( + node: fba::ArrayNode<'_>, + plan_children: &[ChildRangeRead], + ctx: &ReadContext, + session: &VortexSession, + needed: &mut NeededBuffers, +) -> Option>> { + let node_children: Vec<_> = node + .children() + .map_or(vec![], |c| (0..c.len()).map(|i| c.get(i)).collect()); + + // Plan must cover every child in the node. Un-covered children would have their + // buffers missing during decode. + if plan_children.len() != node_children.len() { + return None; + } + + let mut decode_infos = Vec::with_capacity(plan_children.len()); + for (action, child_node) in plan_children.iter().zip(node_children.iter()) { + match action { + ChildRangeRead::Recurse { + row_range, + row_count, + dtype, + } => { + let info = analyze_encoding( + *child_node, + row_range.clone(), + *row_count, + dtype, + ctx, + session, + needed, + ); + decode_infos.push(info); + } + ChildRangeRead::Full => { + need_all_node_buffers(*child_node, needed); + decode_infos.push(None); + } + } + } + + Some(decode_infos) +} + +/// Compute the final decode parameters from the plan's decode info and children results. +fn resolve_decode_info( + decode_info: RangeDecodeInfo, + child_decode_infos: &[Option], +) -> Option { + match decode_info { + RangeDecodeInfo::Leaf { + decode_len, + post_slice, + } => Some(DecodeInfo { + decode_len, + post_slice, + }), + RangeDecodeInfo::FromChild { child_idx, divisor } => { + let child_info = child_decode_infos.get(child_idx)?.as_ref()?; + if divisor == 1 { + return Some(child_info.clone()); + } + if child_info.decode_len % divisor != 0 { + return None; + } + let scaled_post_slice = match &child_info.post_slice { + None => None, + Some(ps) => { + if ps.start % divisor != 0 || ps.end % divisor != 0 { + return None; + } + Some(ps.start / divisor..ps.end / divisor) + } + }; + Some(DecodeInfo { + decode_len: child_info.decode_len / divisor, + post_slice: scaled_post_slice, + }) + } + } +} + +/// Recursively mark all buffers in a node (and its children) as fully needed. +fn need_all_node_buffers(node: fba::ArrayNode<'_>, needed: &mut NeededBuffers) { + if let Some(buffers) = node.buffers() { + for i in 0..buffers.len() { + needed.need_full(buffers.get(i)); + } + } + if let Some(children) = node.children() { + for i in 0..children.len() { + need_all_node_buffers(children.get(i), needed); + } + } +} + +/// Attempt to build a range read plan for the given array tree and row range. +/// +/// Returns `None` if: +/// - The encoding does not support range reads (fallback to full segment read). +/// - The row range covers the entire segment (no benefit). +/// - The computed byte range is not significantly smaller than the full segment. +pub(super) fn try_plan_range_read( + array_tree: &ByteBuffer, + row_range: Range, + row_count: usize, + dtype: &DType, + ctx: &ReadContext, + session: &VortexSession, +) -> VortexResult> { + // Should not happen, but guard against empty ranges to avoid downstream arithmetic issues + // (e.g. saturating_sub overflow in analyze_bitpacked). + if row_range.is_empty() { + return Ok(None); + } + + // No benefit if we need all rows. + if row_range.start == 0 && row_range.end >= row_count { + return Ok(None); + } + + // Parse the flatbuffer. + let fb_array = root::(array_tree.as_ref()) + .map_err(|e| vortex_err!("invalid array tree flatbuffer: {e}"))?; + + let buffer_locations = compute_buffer_locations(&fb_array); + let num_buffers = buffer_locations.len(); + + let root_node = fb_array + .root() + .ok_or_else(|| vortex_err!("array tree has no root node"))?; + + // Analyze the encoding tree via vtable dispatch. + let mut needed = NeededBuffers::new(num_buffers); + let decode_info = match analyze_encoding( + root_node, + row_range, + row_count, + dtype, + ctx, + session, + &mut needed, + ) { + Some(info) => info, + None => return Ok(None), + }; + + // Resolve the needed ranges against the buffer locations. + let mut min_offset = usize::MAX; + let mut max_end = 0usize; + for (i, entry) in needed.entries.iter_mut().enumerate() { + if let Some(range) = entry { + let loc = &buffer_locations[i]; + // Replace sentinel with full range. + if range.end == usize::MAX { + *range = 0..loc.length; + } + // Clamp to actual buffer length. + range.end = range.end.min(loc.length); + + let abs_start = loc.offset + range.start; + let abs_end = loc.offset + range.end; + min_offset = min_offset.min(abs_start); + max_end = max_end.max(abs_end); + } + } + + if min_offset >= max_end { + return Ok(None); + } + + let segment_byte_range = min_offset..max_end; + + // Check if the range read is worth it. + let full_segment_size: usize = buffer_locations + .last() + .map(|loc| loc.offset + loc.length) + .unwrap_or(0); + if full_segment_size == 0 { + return Ok(None); + } + let ratio = segment_byte_range.len() as f64 / full_segment_size as f64; + if ratio > RANGE_READ_THRESHOLD { + return Ok(None); + } + + // Compute per-buffer ranges within the partial segment and collect alignments. + let partial_offset = segment_byte_range.start; + let mut buffer_ranges = Vec::with_capacity(num_buffers); + let mut buffer_alignments = Vec::with_capacity(num_buffers); + for (i, entry) in needed.entries.iter().enumerate() { + let loc = &buffer_locations[i]; + buffer_alignments.push(loc.alignment); + if let Some(sub_range) = entry { + let abs_start = loc.offset + sub_range.start; + let abs_end = loc.offset + sub_range.end; + buffer_ranges.push((abs_start - partial_offset)..(abs_end - partial_offset)); + } else { + buffer_ranges.push(0..0); + } + } + + Ok(Some(RangeReadPlan { + segment_byte_range, + buffer_ranges, + buffer_alignments, + decode_len: decode_info.decode_len, + post_slice: decode_info.post_slice, + })) +} + +/// Execute a range read plan: issue a targeted IO, build ArrayParts from partial buffers, decode. +pub(super) fn execute_range_read( + plan: RangeReadPlan, + array_tree: ByteBuffer, + segment_id: SegmentId, + segment_source: Arc, + dtype: DType, + ctx: ReadContext, + session: VortexSession, +) -> SharedArrayFuture { + async move { + // 1. Issue the targeted read. + let partial = segment_source + .request_range(segment_id, plan.segment_byte_range.clone()) + .await?; + let partial_bytes = partial.try_to_host_sync()?; + + // 2. Slice individual buffers from the partial segment, ensuring alignment. + let mut buffers = Vec::with_capacity(plan.buffer_ranges.len()); + for (i, buf_range) in plan.buffer_ranges.iter().enumerate() { + if buf_range.is_empty() { + buffers.push(BufferHandle::new_host(ByteBuffer::empty())); + } else { + let slice = partial_bytes.slice_unaligned(buf_range.clone()); + let aligned = + BufferHandle::new_host(slice).ensure_aligned(plan.buffer_alignments[i])?; + buffers.push(aligned); + } + } + + // 3. Build ArrayParts and decode. + let parts = ArrayParts::from_flatbuffer_with_buffers(array_tree, buffers)?; + let mut array = parts.decode(&dtype, plan.decode_len, &ctx, &session)?; + + // 4. Post-decode slice if needed (block alignment). + if let Some(slice_range) = plan.post_slice { + array = array.slice(slice_range)?; + } + + Ok(array) + } + .map(|r| r.map_err(Arc::new)) + .boxed() + .shared() +} + +#[cfg(test)] +#[allow(clippy::cast_possible_truncation, clippy::unnecessary_cast)] +mod tests { + use std::sync::Arc; + + use rstest::rstest; + use vortex_array::ArrayContext; + use vortex_array::DynArray; + use vortex_array::IntoArray; + use vortex_array::MaskFuture; + use vortex_array::arrays::BoolArray; + use vortex_array::arrays::Dict; + use vortex_array::arrays::DictArray; + use vortex_array::arrays::FixedSizeListArray; + use vortex_array::arrays::NullArray; + use vortex_array::arrays::PrimitiveArray; + use vortex_array::assert_arrays_eq; + use vortex_array::expr::root; + use vortex_array::scalar_fn::session::ScalarFnSession; + use vortex_array::serde::SerializeOptions; + use vortex_array::session::ArraySessionExt; + use vortex_array::validity::Validity; + use vortex_buffer::Buffer; + use vortex_buffer::buffer; + use vortex_bytebool::ByteBool; + use vortex_bytebool::ByteBoolArray; + use vortex_error::VortexResult; + use vortex_fastlanes::BitPacked; + use vortex_fastlanes::BitPackedArray; + use vortex_fastlanes::Delta; + use vortex_fastlanes::DeltaArray; + use vortex_fastlanes::FoR; + use vortex_fastlanes::FoRArray; + use vortex_fastlanes::delta_compress; + use vortex_io::runtime::single::block_on; + use vortex_io::session::RuntimeSession; + use vortex_session::SessionExt; + use vortex_session::registry::ReadContext; + use vortex_zigzag::ZigZag; + use vortex_zigzag::ZigZagArray; + + use super::*; + use crate::layouts::flat::FlatLayout; + use crate::layouts::flat::reader::FlatReader; + use crate::segments::SegmentSink; + use crate::segments::TestSegments; + use crate::sequence::SequenceId; + use crate::session::LayoutSession; + use crate::session::RangeReadEnabled; + use crate::test::SESSION; + + /// Helper: serialize an array and return (array_tree, segment_id, segments). + fn write_with_array_tree( + array: &dyn DynArray, + ) -> VortexResult<(ByteBuffer, SegmentId, Arc, ReadContext)> { + let ctx = ArrayContext::empty(); + let buffers = array.serialize( + &ctx, + &SerializeOptions { + offset: 0, + include_padding: true, + }, + )?; + let array_tree = buffers[buffers.len() - 2].clone(); + let segments = Arc::new(TestSegments::default()); + let segment_id = + block_on(|_| async { segments.write(SequenceId::root().advance(), buffers).await })?; + let read_ctx = ReadContext::new(ctx.to_ids()); + Ok((array_tree, segment_id, segments, read_ctx)) + } + + #[test] + fn plan_returns_none_for_full_range() -> VortexResult<()> { + let array = PrimitiveArray::new( + buffer![1i32, 2, 3, 4, 5, 6, 7, 8, 9, 10], + Validity::AllValid, + ) + .into_array(); + let (array_tree, _, _, read_ctx) = write_with_array_tree(array.as_ref())?; + + let plan = try_plan_range_read(&array_tree, 0..10, 10, array.dtype(), &read_ctx, &SESSION)?; + assert!(plan.is_none()); + Ok(()) + } + + #[test] + fn plan_returns_some_for_sub_range() -> VortexResult<()> { + let array = PrimitiveArray::new( + buffer![1i32, 2, 3, 4, 5, 6, 7, 8, 9, 10], + Validity::AllValid, + ) + .into_array(); + let (array_tree, _, _, read_ctx) = write_with_array_tree(array.as_ref())?; + + let plan = try_plan_range_read(&array_tree, 2..4, 10, array.dtype(), &read_ctx, &SESSION)?; + let plan = plan.ok_or_else(|| vortex_err!("expected Some plan"))?; + assert_eq!(plan.decode_len, 2); + assert!(plan.post_slice.is_none()); + Ok(()) + } + + #[rstest] + #[case(0..3, &[1i32, 2, 3])] + #[case(2..5, &[3, 4, 5])] + #[case(7..10, &[8, 9, 10])] + #[case(0..1, &[1])] + #[case(9..10, &[10])] + fn primitive_range_read_end_to_end( + #[case] row_range: Range, + #[case] expected: &[i32], + ) -> VortexResult<()> { + let array = PrimitiveArray::new( + buffer![1i32, 2, 3, 4, 5, 6, 7, 8, 9, 10], + Validity::AllValid, + ) + .into_array(); + let (array_tree, segment_id, segments, read_ctx) = write_with_array_tree(array.as_ref())?; + + let layout = FlatLayout::new_with_metadata( + 10, + array.dtype().clone(), + segment_id, + read_ctx, + Some(array_tree), + ); + + let expected_array = + PrimitiveArray::new(Buffer::::from(expected.to_vec()), Validity::AllValid) + .into_array(); + + block_on(|_| async { + let reader = FlatReader::new( + layout, + "test".into(), + segments as Arc, + SESSION.clone(), + ); + + let result = crate::LayoutReader::projection_evaluation( + &reader, + &(row_range.start as u64..row_range.end as u64), + &root(), + MaskFuture::new_true(row_range.len()), + )? + .await?; + + assert_arrays_eq!(result, expected_array); + Ok(()) + }) + } + + #[test] + fn range_read_disabled_via_session() -> VortexResult<()> { + let array = PrimitiveArray::new( + buffer![1i32, 2, 3, 4, 5, 6, 7, 8, 9, 10], + Validity::AllValid, + ) + .into_array(); + let (array_tree, segment_id, segments, read_ctx) = write_with_array_tree(array.as_ref())?; + + // Create a session with range read disabled. + let session = VortexSession::empty() + .with::() + .with::() + .with::() + .with::(); + session.get_mut::().0 = false; + + let layout = FlatLayout::new_with_metadata( + 10, + array.dtype().clone(), + segment_id, + read_ctx, + Some(array_tree), + ); + + // Even though array_tree is present, range read should be skipped + // and the reader should fall back to full segment read + slice. + let expected = PrimitiveArray::new(buffer![3i32, 4], Validity::AllValid).into_array(); + + block_on(|_| async { + let reader = FlatReader::new( + layout, + "test".into(), + segments as Arc, + session, + ); + + let result = crate::LayoutReader::projection_evaluation( + &reader, + &(2..4), + &root(), + MaskFuture::new_true(2), + )? + .await?; + + assert_arrays_eq!(result, expected); + Ok(()) + }) + } + + #[test] + fn fallback_without_array_tree() -> VortexResult<()> { + let array = PrimitiveArray::new(buffer![1i32, 2, 3, 4, 5], Validity::AllValid).into_array(); + let ctx = ArrayContext::empty(); + let buffers = array.serialize( + &ctx, + &SerializeOptions { + offset: 0, + include_padding: true, + }, + )?; + let segments = Arc::new(TestSegments::default()); + let segment_id = + block_on(|_| async { segments.write(SequenceId::root().advance(), buffers).await })?; + + // No array_tree → range read not possible, should fall back to full read. + let layout = FlatLayout::new( + 5, + array.dtype().clone(), + segment_id, + ReadContext::new(ctx.to_ids()), + ); + + let expected = PrimitiveArray::new(buffer![3i32, 4], Validity::AllValid).into_array(); + + block_on(|_| async { + let reader = FlatReader::new( + layout, + "test".into(), + segments as Arc, + SESSION.clone(), + ); + + let result = crate::LayoutReader::projection_evaluation( + &reader, + &(2..4), + &root(), + MaskFuture::new_true(2), + )? + .await?; + + assert_arrays_eq!(result, expected); + Ok(()) + }) + } + + // For each supported encoding, write the array with array_tree metadata, + // read a sub-range via FlatReader, and compare with the canonical array + // sliced to the same range. + + const N: usize = 4096; + + /// Helper: round-trip an encoded array through write → FlatReader sub-range read, + /// and compare with `canonical.slice(row_range)`. + fn roundtrip_range( + encoded: &dyn DynArray, + canonical: &dyn DynArray, + row_range: Range, + ) -> VortexResult<()> { + let (array_tree, segment_id, segments, read_ctx) = write_with_array_tree(encoded)?; + let row_count = encoded.len() as u64; + let layout = FlatLayout::new_with_metadata( + row_count, + encoded.dtype().clone(), + segment_id, + read_ctx, + Some(array_tree), + ); + let expected = canonical.slice(row_range.clone())?; + + // Register encodings needed by these tests. + SESSION.arrays().register(Dict::ID, Dict); + SESSION.arrays().register(BitPacked::ID, BitPacked); + SESSION.arrays().register(FoR::ID, FoR); + SESSION.arrays().register(Delta::ID, Delta); + SESSION.arrays().register(ZigZag::ID, ZigZag); + + block_on(|_| async { + let reader = FlatReader::new( + layout, + "test".into(), + segments as Arc, + SESSION.clone(), + ); + let result = crate::LayoutReader::projection_evaluation( + &reader, + &(row_range.start as u64..row_range.end as u64), + &root(), + MaskFuture::new_true(row_range.len()), + )? + .await?; + assert_arrays_eq!(result, expected); + Ok(()) + }) + } + + /// Non-fallback: range read IS used for these encodings. + #[test] + fn non_fallback_range_reads() -> VortexResult<()> { + let ranges: &[Range] = &[ + 0..1, + 42..43, + 512..513, + 1000..1001, + 1023..1024, + 1024..1025, + 2048..2049, + 3000..3001, + 3500..3501, + 4095..4096, + 1500..1510, + ]; + + // Bool (byte-aligned starts only). + let bool_ranges: &[Range] = &[ + 0..1, + 8..9, + 16..17, + 64..65, + 128..129, + 256..257, + 512..513, + 1024..1025, + 2048..2049, + 4088..4089, + 1024..1034, + ]; + let bool_arr = BoolArray::from_iter((0..N).map(|i| i % 3 == 0)); + for r in bool_ranges { + roundtrip_range(bool_arr.as_ref(), bool_arr.as_ref(), r.clone())?; + } + + // BitPacked (no patches, no validity). + let bp_values: PrimitiveArray = (0..N).map(|i| (i % 256) as u32).collect(); + let bp_canonical = bp_values.into_array(); + let bp_encoded = BitPackedArray::encode(&bp_canonical, 8)?; + for r in ranges { + roundtrip_range(bp_encoded.as_ref(), bp_canonical.as_ref(), r.clone())?; + } + + // FoR (transparent wrapper → Primitive child). + let for_build = || -> PrimitiveArray { (0..N).map(|i| (i + 1000) as i32).collect() }; + let for_canonical = for_build().into_array(); + let for_encoded = FoRArray::encode(for_build())?; + for r in ranges { + roundtrip_range(for_encoded.as_ref(), for_canonical.as_ref(), r.clone())?; + } + + // Delta. + let delta_build = || -> PrimitiveArray { (0..N).map(|i| (i * 3 + 100) as u32).collect() }; + let delta_canonical = delta_build().into_array(); + let delta_encoded = DeltaArray::try_from_primitive_array(&delta_build())?; + for r in ranges { + roundtrip_range(delta_encoded.as_ref(), delta_canonical.as_ref(), r.clone())?; + } + + // Dict (high positions so codes sub-range + values < 50% of segment). + let dict_ranges: &[Range] = &[ + 3000..3001, + 3200..3201, + 3400..3401, + 3600..3601, + 3800..3801, + 4095..4096, + 3800..3810, + ]; + let codes: PrimitiveArray = (0..N).map(|i| (i % 5) as u8).collect(); + let values: PrimitiveArray = (0..5).map(|i| (i * 10) as i32).collect(); + let dict_encoded = DictArray::new(codes.into_array(), values.into_array()); + let dict_canonical: PrimitiveArray = (0..N).map(|i| ((i % 5) * 10) as i32).collect(); + for r in dict_ranges { + roundtrip_range(dict_encoded.as_ref(), dict_canonical.as_ref(), r.clone())?; + } + + // FixedSizeList (non-nullable, Primitive child). + let list_size = 4u32; + let total = N * list_size as usize; + let build_fsl = || { + let flat: PrimitiveArray = (0..total).map(|i| i as u32).collect(); + FixedSizeListArray::try_new(flat.into_array(), list_size, Validity::NonNullable, N) + .unwrap() + }; + let fsl_encoded = build_fsl(); + let fsl_canonical = build_fsl(); + for r in ranges { + roundtrip_range(fsl_encoded.as_ref(), fsl_canonical.as_ref(), r.clone())?; + } + + // ── Nested: FoR → BitPacked ── + // Manually construct: BitPack shifted values, then wrap in FoR. + let for_bp_canonical: PrimitiveArray = (0..N).map(|i| (i + 1000) as i32).collect(); + let shifted: PrimitiveArray = (0..N).map(|i| i as i32).collect(); + let bp = BitPackedArray::encode(&shifted.into_array(), 12)?; + let for_bp = FoRArray::try_new(bp.into_array(), 1000i32.into())?; + for r in ranges { + roundtrip_range(for_bp.as_ref(), for_bp_canonical.as_ref(), r.clone())?; + } + + // ── Nested: Delta → BitPacked deltas ── + let delta_bp_build = + || -> PrimitiveArray { (0..N).map(|i| (i * 3 + 100) as u32).collect() }; + let delta_bp_canonical = delta_bp_build().into_array(); + let (bases, deltas) = delta_compress(&delta_bp_build())?; + let deltas_len = deltas.len(); + let bp_deltas = BitPackedArray::encode(&deltas.into_array(), 16)?; + let delta_bp = + DeltaArray::try_new(bases.into_array(), bp_deltas.into_array(), 0, deltas_len)?; + for r in ranges { + roundtrip_range(delta_bp.as_ref(), delta_bp_canonical.as_ref(), r.clone())?; + } + + // ── Nested: FixedSizeList → BitPacked child ── + let fsl_bp_canonical = build_fsl(); + let flat_bp: PrimitiveArray = (0..total).map(|i| i as u32).collect(); + let bp_flat = BitPackedArray::encode(&flat_bp.into_array(), 16)?; + let fsl_bp = + FixedSizeListArray::try_new(bp_flat.into_array(), list_size, Validity::NonNullable, N)?; + for r in ranges { + roundtrip_range(fsl_bp.as_ref(), fsl_bp_canonical.as_ref(), r.clone())?; + } + + // ByteBool (1 byte per element). + SESSION.arrays().register(ByteBool::ID, ByteBool); + let bb_values: Vec = (0..N).map(|i| i % 3 == 0).collect(); + let bb = ByteBoolArray::from(bb_values); + // ByteBool roundtrips through itself (not BoolArray) since it decodes to bool?. + for r in ranges { + roundtrip_range(bb.as_ref(), bb.as_ref(), r.clone())?; + } + + // Null (no buffers, no children). + let null_arr = NullArray::new(N); + for r in ranges { + roundtrip_range(null_arr.as_ref(), null_arr.as_ref(), r.clone())?; + } + + Ok(()) + } + + /// Fallback: range read falls back to full segment read + slice. + #[test] + fn fallback_range_reads() -> VortexResult<()> { + let ranges: &[Range] = &[ + 0..1, + 42..43, + 512..513, + 1000..1001, + 1023..1024, + 1024..1025, + 2048..2049, + 3000..3001, + 3500..3501, + 4095..4096, + 1500..1510, + ]; + + // Nullable Primitive (validity child → range read unsupported). + let values: Vec = (0..N).map(|i| (i * 7 + 13) as i32).collect(); + let validity = Validity::from_iter((0..N).map(|i| i % 7 != 0)); + let nullable = PrimitiveArray::new(Buffer::from(values), validity); + for r in ranges { + roundtrip_range(nullable.as_ref(), nullable.as_ref(), r.clone())?; + } + + // Bool with non-byte-aligned start. + let unaligned_ranges: &[Range] = &[ + 1..2, + 3..4, + 5..6, + 7..8, + 13..14, + 42..43, + 100..101, + 999..1000, + 2047..2048, + 3999..4000, + 1025..1035, + ]; + let bool_arr = BoolArray::from_iter((0..N).map(|i| i % 3 == 0)); + for r in unaligned_ranges { + roundtrip_range(bool_arr.as_ref(), bool_arr.as_ref(), r.clone())?; + } + + // ── Nested fallback: FoR → nullable Primitive ── + // FoR is transparent, recurses into nullable Primitive which has validity child. + let validity = Validity::from_iter((0..N).map(|i| i % 5 != 0)); + let shifted: Vec = (0..N).map(|i| i as i32).collect(); + let shifted_arr = PrimitiveArray::new(Buffer::from(shifted), validity.clone()); + let for_nullable = FoRArray::try_new(shifted_arr.into_array(), 1000i32.into())?; + let for_canonical_values: Vec = (0..N).map(|i| (i + 1000) as i32).collect(); + let for_canonical = PrimitiveArray::new(Buffer::from(for_canonical_values), validity); + for r in ranges { + roundtrip_range(for_nullable.as_ref(), for_canonical.as_ref(), r.clone())?; + } + + // ── Nested fallback: ZigZag → nullable Primitive ── + // ZigZag is transparent, recurses into nullable Primitive. + let validity = Validity::from_iter((0..N).map(|i| i % 5 != 0)); + let encoded_values: Vec = (0..N).map(|i| (i * 2) as u32).collect(); + let encoded_arr = PrimitiveArray::new(Buffer::from(encoded_values), validity.clone()); + let zigzag = ZigZagArray::try_new(encoded_arr.into_array())?; + // zigzag_decode(2*i) = i + let zz_canonical_values: Vec = (0..N).map(|i| i as i32).collect(); + let zz_canonical = PrimitiveArray::new(Buffer::from(zz_canonical_values), validity); + for r in ranges { + roundtrip_range(zigzag.as_ref(), zz_canonical.as_ref(), r.clone())?; + } + + Ok(()) + } +} diff --git a/vortex-layout/src/layouts/flat/reader.rs b/vortex-layout/src/layouts/flat/reader.rs index 727566d3db8..191d0b2526e 100644 --- a/vortex-layout/src/layouts/flat/reader.rs +++ b/vortex-layout/src/layouts/flat/reader.rs @@ -19,12 +19,14 @@ use vortex_array::serde::ArrayParts; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_mask::Mask; +use vortex_session::SessionExt; use vortex_session::VortexSession; use crate::LayoutReader; use crate::layouts::SharedArrayFuture; use crate::layouts::flat::FlatLayout; use crate::segments::SegmentSource; +use crate::session::RangeReadEnabled; /// The threshold of mask density below which we will evaluate the expression only over the /// selected rows, and above which we evaluate the expression over all rows and then select @@ -85,6 +87,37 @@ impl FlatReader { .boxed() .shared() } + + /// Try to perform a range read for the given row range. + /// + /// Returns `None` if range read is not supported or not beneficial, in which case + /// the caller should fall back to `array_future()`. The decision is made purely in + /// memory (zero IO) based on the inlined `array_tree` metadata. + fn try_range_read_array(&self, row_range: Range) -> Option { + if !self.session.get::().0 { + return None; + } + let array_tree = self.layout.array_tree()?; + let row_count = usize::try_from(self.layout.row_count()).ok()?; + let plan = super::range_read::try_plan_range_read( + array_tree, + row_range, + row_count, + self.layout.dtype(), + self.layout.array_ctx(), + &self.session, + ) + .ok()??; + Some(super::range_read::execute_range_read( + plan, + array_tree.clone(), + self.layout.segment_id(), + self.segment_source.clone(), + self.layout.dtype().clone(), + self.layout.array_ctx().clone(), + self.session.clone(), + )) + } } impl LayoutReader for FlatReader { @@ -130,7 +163,13 @@ impl LayoutReader for FlatReader { ..usize::try_from(row_range.end) .vortex_expect("Row range end must fit within FlatLayout size"); let name = self.name.clone(); - let array = self.array_future(); + + // Try range read; fall back to full segment read if unsupported or not beneficial. + let (array, already_sliced) = match self.try_range_read_array(row_range.clone()) { + Some(fut) => (fut, true), + None => (self.array_future(), false), + }; + let expr = expr.clone(); let session = self.session.clone(); @@ -141,8 +180,8 @@ impl LayoutReader for FlatReader { let mut array = array.clone().await?; let mask = mask.await?; - // Slice the array based on the row mask. - if row_range.start > 0 || row_range.end < array.len() { + // Only slice when we read the full segment. + if !already_sliced && (row_range.start > 0 || row_range.end < array.len()) { array = array.slice(row_range.clone())?; } @@ -188,7 +227,13 @@ impl LayoutReader for FlatReader { ..usize::try_from(row_range.end) .vortex_expect("Row range end must fit within FlatLayout size"); let name = self.name.clone(); - let array = self.array_future(); + + // Try range read; fall back to full segment read if unsupported or not beneficial. + let (array, already_sliced) = match self.try_range_read_array(row_range.clone()) { + Some(fut) => (fut, true), + None => (self.array_future(), false), + }; + let expr = expr.clone(); Ok(async move { @@ -197,8 +242,8 @@ impl LayoutReader for FlatReader { let mut array = array.clone().await?; let mask = mask.await?; - // Slice the array based on the row mask. - if row_range.start > 0 || row_range.end < array.len() { + // Only slice when we read the full segment. + if !already_sliced && (row_range.start > 0 || row_range.end < array.len()) { array = array.slice(row_range.clone())?; } diff --git a/vortex-layout/src/segments/cache.rs b/vortex-layout/src/segments/cache.rs index 174c6b1e637..b99dfc528a2 100644 --- a/vortex-layout/src/segments/cache.rs +++ b/vortex-layout/src/segments/cache.rs @@ -136,6 +136,23 @@ impl SegmentCacheSourceAdapter { } impl SegmentSource for SegmentCacheSourceAdapter { + fn request_range(&self, id: SegmentId, range: std::ops::Range) -> SegmentFuture { + let cache = self.cache.clone(); + let source = self.source.clone(); + + async move { + // Check the cache first; if the full segment is cached, slice from it. + if let Ok(Some(segment)) = cache.get(id).await { + tracing::debug!("Resolved segment {} range from cache", id); + let slice = segment.slice_unaligned(range); + return Ok(BufferHandle::new_host(slice)); + } + // Cache miss: delegate to the inner source's range read. + source.request_range(id, range).await + } + .boxed() + } + fn request(&self, id: SegmentId) -> SegmentFuture { let cache = self.cache.clone(); let delegate = self.source.request(id); diff --git a/vortex-layout/src/segments/shared.rs b/vortex-layout/src/segments/shared.rs index c794daf608e..0216fd246bc 100644 --- a/vortex-layout/src/segments/shared.rs +++ b/vortex-layout/src/segments/shared.rs @@ -38,6 +38,12 @@ impl SharedSegmentSource { } impl SegmentSource for SharedSegmentSource { + fn request_range(&self, id: SegmentId, range: std::ops::Range) -> SegmentFuture { + // Range requests are not deduplicated since each caller may request + // different byte ranges within the same segment. + self.inner.request_range(id, range) + } + fn request(&self, id: SegmentId) -> SegmentFuture { loop { match self.in_flight.entry(id) { diff --git a/vortex-layout/src/segments/source.rs b/vortex-layout/src/segments/source.rs index a48a79b2889..ddfded29744 100644 --- a/vortex-layout/src/segments/source.rs +++ b/vortex-layout/src/segments/source.rs @@ -1,6 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Range; + +use futures::FutureExt; use futures::future::BoxFuture; use vortex_array::buffer::BufferHandle; use vortex_error::VortexResult; @@ -13,4 +16,19 @@ pub type SegmentFuture = BoxFuture<'static, VortexResult>; pub trait SegmentSource: 'static + Send + Sync { /// Request a segment, returning a future that will eventually resolve to the segment data. fn request(&self, id: SegmentId) -> SegmentFuture; + + /// Request a byte range within a segment. + /// + /// The default implementation reads the full segment and then slices. + /// Implementations backed by random-access storage (files, object stores) should override + /// this to issue a targeted read. + fn request_range(&self, id: SegmentId, range: Range) -> SegmentFuture { + let fut = self.request(id); + async move { + let buffer = fut.await?; + let host = buffer.try_to_host_sync()?; + Ok(BufferHandle::new_host(host.slice_unaligned(range))) + } + .boxed() + } } diff --git a/vortex-layout/src/session.rs b/vortex-layout/src/session.rs index 131115950e4..f335a88c2bd 100644 --- a/vortex-layout/src/session.rs +++ b/vortex-layout/src/session.rs @@ -54,6 +54,22 @@ impl Default for LayoutSession { } } +/// Session variable controlling whether FlatLayout range reads are enabled. +/// +/// When `true` (the default), the reader will attempt to read only the byte range +/// needed for the requested rows instead of reading the entire segment. This +/// requires the `array_tree` metadata to be inlined in the layout footer at write +/// time; when the metadata is absent, the reader falls back to a full segment read +/// regardless of this setting. +#[derive(Debug, Clone)] +pub struct RangeReadEnabled(pub bool); + +impl Default for RangeReadEnabled { + fn default() -> Self { + Self(true) + } +} + /// Extension trait for accessing layout session data. pub trait LayoutSessionExt: SessionExt { /// Returns the layout encoding registry. diff --git a/vortex-scan/public-api.lock b/vortex-scan/public-api.lock index 7456d12de14..b53a0e957aa 100644 --- a/vortex-scan/public-api.lock +++ b/vortex-scan/public-api.lock @@ -329,3 +329,5 @@ pub fn vortex_scan::ScanBuilder::with_some_limit(self, limit: core::option::O pub fn vortex_scan::ScanBuilder::with_some_metrics_registry(self, metrics: core::option::Option>) -> Self pub fn vortex_scan::ScanBuilder::with_split_by(self, split_by: vortex_scan::SplitBy) -> Self + +pub fn vortex_scan::ScanBuilder::with_split_row_indices(self, split: bool) -> Self diff --git a/vortex-scan/src/scan_builder.rs b/vortex-scan/src/scan_builder.rs index 39510453004..f2fca0b220e 100644 --- a/vortex-scan/src/scan_builder.rs +++ b/vortex-scan/src/scan_builder.rs @@ -74,6 +74,10 @@ pub struct ScanBuilder { /// The row-offset assigned to the first row of the file. Used by the `row_idx` expression, /// but not by the scan [`Selection`] which remains relative. row_offset: u64, + /// Whether to split row indices into merged ranges via [`attempt_split_ranges`]. + /// When `false` and the selection is `IncludeByIndex` with no `row_range`, + /// each index gets its own tight single-row range for maximum range-read benefit. + split_row_indices: bool, } impl ScanBuilder { @@ -95,6 +99,7 @@ impl ScanBuilder { file_stats: None, limit: None, row_offset: 0, + split_row_indices: true, } } @@ -161,6 +166,17 @@ impl ScanBuilder { self } + /// Whether to merge row indices into broader ranges when splitting. + /// + /// When `true` (the default), nearby row indices are merged into ranges + /// via density and gap heuristics. When `false` and the selection is + /// `IncludeByIndex` with no `row_range`, each index gets its own + /// single-row range for maximum range-read benefit. + pub fn with_split_row_indices(mut self, split: bool) -> Self { + self.split_row_indices = split; + self + } + pub fn with_row_offset(mut self, row_offset: u64) -> Self { self.row_offset = row_offset; self @@ -233,6 +249,7 @@ impl ScanBuilder { file_stats: self.file_stats, limit: self.limit, row_offset: self.row_offset, + split_row_indices: self.split_row_indices, map_fn: Arc::new(move |a| old_map_fn(a).and_then(&map_fn)), } } @@ -270,20 +287,36 @@ impl ScanBuilder { filter_and_projection_masks(&projection, filter.as_ref(), layout_reader.dtype())?; let field_mask: Vec<_> = [filter_mask, projection_mask].concat(); - let splits = - if let Some(ranges) = attempt_split_ranges(&self.selection, self.row_range.as_ref()) { - Splits::Ranges(ranges) - } else { - let split_range = self - .row_range - .clone() - .unwrap_or_else(|| 0..layout_reader.row_count()); - Splits::Natural(self.split_by.splits( - layout_reader.as_ref(), - &split_range, - &field_mask, - )?) + let splits = if !self.split_row_indices + && matches!(self.selection, Selection::IncludeByIndex(_)) + && self.row_range.is_none() + { + // Per-index tight ranges for maximum range-read benefit. + let indices = match &self.selection { + Selection::IncludeByIndex(indices) => indices, + _ => unreachable!(), }; + let ranges: Vec<_> = indices + .as_slice() + .iter() + .copied() + .map(|idx| idx..idx + 1) + .collect(); + Splits::Ranges(ranges) + } else if let Some(ranges) = attempt_split_ranges(&self.selection, self.row_range.as_ref()) + { + Splits::Ranges(ranges) + } else { + let split_range = self + .row_range + .clone() + .unwrap_or_else(|| 0..layout_reader.row_count()); + Splits::Natural(self.split_by.splits( + layout_reader.as_ref(), + &split_range, + &field_mask, + )?) + }; Ok(RepeatedScan::new( self.session.clone(),