Skip to content

Commit 6081b5b

Browse files
committed
Add FlatLayout range read for sub-segment IO
When a FlatLayout has its array_tree metadata inlined in the footer, we can figure out exactly which bytes of the segment are needed for a given row range without any IO. This lets us issue a single small read instead of fetching the entire segment, which is a big win for point lookups and narrow scans on wide tables. The range read planner walks the encoding tree (Primitive, Bool, BitPacked, Delta, FoR, ZigZag, ALP, ALPRD, Dict, FixedSizeList, Constant) and computes the minimal contiguous byte range covering the needed buffers. If that range is less than 50% of the full segment, we issue the targeted read; otherwise we fall back to reading the whole segment. To make Delta work with sub-ranged buffers, Delta::build() now derives child array lengths from `len + offset` instead of metadata.deltas_len. On disk, offset is always 0 so this is a no-op for the normal decode path, but it lets the range read pass a smaller decode_len without the decoder panicking on buffer size mismatch. Also adds `request_range()` to the SegmentSource trait with a default fallback implementation, efficient overrides in FileSegmentSource and BufferSegmentSource, a `RangeReadEnabled` session flag, and `pub const NAME` on all encoding structs for pattern matching in the planner. Signed-off-by: jiaqizho <jiaqi.zhou@zilliz.com>
1 parent 2a0d1b7 commit 6081b5b

46 files changed

Lines changed: 1698 additions & 59 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

encodings/alp/src/alp/array.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,8 @@ pub struct ALPArray {
273273
pub struct ALP;
274274

275275
impl ALP {
276-
pub const ID: ArrayId = ArrayId::new_ref("vortex.alp");
276+
pub const NAME: &str = "vortex.alp";
277+
pub const ID: ArrayId = ArrayId::new_ref(Self::NAME);
277278
}
278279

279280
#[derive(Clone, prost::Message)]

encodings/alp/src/alp_rd/array.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,8 @@ pub struct ALPRDArray {
371371
pub struct ALPRD;
372372

373373
impl ALPRD {
374-
pub const ID: ArrayId = ArrayId::new_ref("vortex.alprd");
374+
pub const NAME: &str = "vortex.alprd";
375+
pub const ID: ArrayId = ArrayId::new_ref(Self::NAME);
375376
}
376377

377378
impl ALPRDArray {

encodings/bytebool/src/array.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,8 @@ pub struct ByteBoolArray {
213213
pub struct ByteBool;
214214

215215
impl ByteBool {
216-
pub const ID: ArrayId = ArrayId::new_ref("vortex.bytebool");
216+
pub const NAME: &str = "vortex.bytebool";
217+
pub const ID: ArrayId = ArrayId::new_ref(Self::NAME);
217218
}
218219

219220
impl ByteBoolArray {

encodings/datetime-parts/src/array.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,8 @@ pub struct DateTimePartsArrayParts {
267267
pub struct DateTimeParts;
268268

269269
impl DateTimeParts {
270-
pub const ID: ArrayId = ArrayId::new_ref("vortex.datetimeparts");
270+
pub const NAME: &str = "vortex.datetimeparts";
271+
pub const ID: ArrayId = ArrayId::new_ref(Self::NAME);
271272
}
272273

273274
impl DateTimePartsArray {

encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,8 @@ impl DecimalBytePartsArray {
274274
pub struct DecimalByteParts;
275275

276276
impl DecimalByteParts {
277-
pub const ID: ArrayId = ArrayId::new_ref("vortex.decimal_byte_parts");
277+
pub const NAME: &str = "vortex.decimal_byte_parts";
278+
pub const ID: ArrayId = ArrayId::new_ref(Self::NAME);
278279
}
279280

280281
/// Converts a DecimalBytePartsArray to its canonical DecimalArray representation.

encodings/fastlanes/src/bitpacking/vtable/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,5 +372,6 @@ impl VTable for BitPacked {
372372
pub struct BitPacked;
373373

374374
impl BitPacked {
375-
pub const ID: ArrayId = ArrayId::new_ref("fastlanes.bitpacked");
375+
pub const NAME: &str = "fastlanes.bitpacked";
376+
pub const ID: ArrayId = ArrayId::new_ref(Self::NAME);
376377
}

encodings/fastlanes/src/delta/vtable/mod.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ use vortex_array::vtable::VTable;
2525
use vortex_array::vtable::ValidityVTableFromChildSliceHelper;
2626
use vortex_error::VortexResult;
2727
use vortex_error::vortex_ensure;
28-
use vortex_error::vortex_err;
2928
use vortex_error::vortex_panic;
3029
use vortex_session::VortexSession;
3130

@@ -177,11 +176,12 @@ impl VTable for Delta {
177176
let ptype = PType::try_from(dtype)?;
178177
let lanes = match_each_unsigned_integer_ptype!(ptype, |T| { <T as FastLanes>::LANES });
179178

180-
// Compute the length of the bases array
181-
let deltas_len = usize::try_from(metadata.0.deltas_len)
182-
.map_err(|_| vortex_err!("deltas_len {} overflowed usize", metadata.0.deltas_len))?;
179+
// Compute the length of the deltas array from len + offset rather than metadata.
180+
// This allows range reads to work with sub-ranged children, where the buffer is
181+
// shorter than the original metadata.deltas_len.
182+
let deltas_len = len + metadata.0.offset as usize;
183183
let num_chunks = deltas_len / 1024;
184-
let remainder_base_size = if deltas_len % 1024 > 0 { 1 } else { 0 };
184+
let remainder_base_size = if !deltas_len.is_multiple_of(1024) { 1 } else { 0 };
185185
let bases_len = num_chunks * lanes + remainder_base_size;
186186

187187
let bases = children.get(0, dtype, bases_len)?;
@@ -201,7 +201,8 @@ impl VTable for Delta {
201201
pub struct Delta;
202202

203203
impl Delta {
204-
pub const ID: ArrayId = ArrayId::new_ref("fastlanes.delta");
204+
pub const NAME: &str = "fastlanes.delta";
205+
pub const ID: ArrayId = ArrayId::new_ref(Self::NAME);
205206
}
206207

207208
#[cfg(test)]

encodings/fastlanes/src/for/vtable/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,5 +184,6 @@ impl VTable for FoR {
184184
pub struct FoR;
185185

186186
impl FoR {
187-
pub const ID: ArrayId = ArrayId::new_ref("fastlanes.for");
187+
pub const NAME: &str = "fastlanes.for";
188+
pub const ID: ArrayId = ArrayId::new_ref(Self::NAME);
188189
}

encodings/fastlanes/src/rle/vtable/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,8 @@ impl VTable for RLE {
242242
pub struct RLE;
243243

244244
impl RLE {
245-
pub const ID: ArrayId = ArrayId::new_ref("fastlanes.rle");
245+
pub const NAME: &str = "fastlanes.rle";
246+
pub const ID: ArrayId = ArrayId::new_ref(Self::NAME);
246247
}
247248

248249
#[cfg(test)]

0 commit comments

Comments
 (0)