From eca49d3b52c7a57668e80f687fcc96df23f5d876 Mon Sep 17 00:00:00 2001 From: cancaicai <2356672992@qq.com> Date: Sat, 7 Mar 2026 23:02:24 +0800 Subject: [PATCH] refactor(layout): add shared mask/row_range debug assertions Add a shared debug-only helper to assert mask length matches row_range length, and apply it across dict, chunked, flat, struct, and zoned readers to catch mask-range mismatch earlier during pruning/filter/projection paths. Signed-off-by: cancaicai <2356672992@qq.com> Made-with: Cursor --- vortex-layout/src/layouts/chunked/reader.rs | 7 +++++ vortex-layout/src/layouts/dict/reader.rs | 9 ++++++- vortex-layout/src/layouts/flat/reader.rs | 8 +++++- vortex-layout/src/layouts/struct_/reader.rs | 7 +++++ vortex-layout/src/layouts/zoned/reader.rs | 6 +++++ vortex-layout/src/reader.rs | 30 +++++++++++++++++++++ 6 files changed, 65 insertions(+), 2 deletions(-) diff --git a/vortex-layout/src/layouts/chunked/reader.rs b/vortex-layout/src/layouts/chunked/reader.rs index 34816d5bdf8..0513c54a815 100644 --- a/vortex-layout/src/layouts/chunked/reader.rs +++ b/vortex-layout/src/layouts/chunked/reader.rs @@ -27,6 +27,7 @@ use crate::LayoutReaderRef; use crate::LazyReaderChildren; use crate::layouts::chunked::ChunkedLayout; use crate::reader::LayoutReader; +use crate::reader::debug_assert_mask_matches_row_range; use crate::segments::SegmentSource; /// A [`LayoutReader`] for chunked layouts. @@ -206,6 +207,8 @@ impl LayoutReader for ChunkedReader { expr: &Expression, mask: Mask, ) -> VortexResult { + debug_assert_mask_matches_row_range(mask.len(), row_range, "chunked pruning"); + if row_range.is_empty() { return Ok(MaskFuture::ready(mask)); } @@ -252,6 +255,8 @@ impl LayoutReader for ChunkedReader { expr: &Expression, mask: MaskFuture, ) -> VortexResult { + debug_assert_mask_matches_row_range(mask.len(), row_range, "chunked filter"); + if row_range.is_empty() { return Ok(mask); } @@ -291,6 +296,8 @@ impl LayoutReader for ChunkedReader { expr: &Expression, mask: MaskFuture, ) -> VortexResult>> { + debug_assert_mask_matches_row_range(mask.len(), row_range, "chunked projection"); + let dtype = expr.return_dtype(self.dtype())?; if row_range.is_empty() { return Ok( diff --git a/vortex-layout/src/layouts/dict/reader.rs b/vortex-layout/src/layouts/dict/reader.rs index 5054fcd27f3..72c3bba85a5 100644 --- a/vortex-layout/src/layouts/dict/reader.rs +++ b/vortex-layout/src/layouts/dict/reader.rs @@ -34,6 +34,7 @@ use super::DictLayout; use crate::LayoutReader; use crate::LayoutReaderRef; use crate::layouts::SharedArrayFuture; +use crate::reader::debug_assert_mask_matches_row_range; use crate::segments::SegmentSource; pub struct DictReader { @@ -156,10 +157,12 @@ impl LayoutReader for DictReader { fn pruning_evaluation( &self, - _row_range: &Range, + row_range: &Range, _expr: &Expression, mask: Mask, ) -> VortexResult { + debug_assert_mask_matches_row_range(mask.len(), row_range, "dict pruning"); + // NOTE: we can get the values here, convert expression to the codes domain, and push down // to the codes child. We don't do that here because: // - Reading values only for an approx filter is expensive @@ -173,6 +176,8 @@ impl LayoutReader for DictReader { expr: &Expression, mask: MaskFuture, ) -> VortexResult { + debug_assert_mask_matches_row_range(mask.len(), row_range, "dict filter"); + // TODO(joe): fix up expr partitioning with fallible & null sensitive annotations let values_eval = self.values_eval(expr.clone()); @@ -205,6 +210,8 @@ impl LayoutReader for DictReader { expr: &Expression, mask: MaskFuture, ) -> VortexResult>> { + debug_assert_mask_matches_row_range(mask.len(), row_range, "dict projection"); + // TODO: fix up expr partitioning with fallible & null sensitive annotations let values_eval = self.values_eval(root()); let codes_eval = self diff --git a/vortex-layout/src/layouts/flat/reader.rs b/vortex-layout/src/layouts/flat/reader.rs index 727566d3db8..31298031d0b 100644 --- a/vortex-layout/src/layouts/flat/reader.rs +++ b/vortex-layout/src/layouts/flat/reader.rs @@ -24,6 +24,7 @@ use vortex_session::VortexSession; use crate::LayoutReader; use crate::layouts::SharedArrayFuture; use crate::layouts::flat::FlatLayout; +use crate::reader::debug_assert_mask_matches_row_range; use crate::segments::SegmentSource; /// The threshold of mask density below which we will evaluate the expression only over the @@ -112,10 +113,11 @@ impl LayoutReader for FlatReader { fn pruning_evaluation( &self, - _row_range: &Range, + row_range: &Range, _expr: &Expression, mask: Mask, ) -> VortexResult { + debug_assert_mask_matches_row_range(mask.len(), row_range, "flat pruning"); Ok(MaskFuture::ready(mask)) } @@ -125,6 +127,8 @@ impl LayoutReader for FlatReader { expr: &Expression, mask: MaskFuture, ) -> VortexResult { + debug_assert_mask_matches_row_range(mask.len(), row_range, "flat filter"); + let row_range = usize::try_from(row_range.start) .vortex_expect("Row range begin must fit within FlatLayout size") ..usize::try_from(row_range.end) @@ -183,6 +187,8 @@ impl LayoutReader for FlatReader { expr: &Expression, mask: MaskFuture, ) -> VortexResult>> { + debug_assert_mask_matches_row_range(mask.len(), row_range, "flat projection"); + let row_range = usize::try_from(row_range.start) .vortex_expect("Row range begin must fit within FlatLayout size") ..usize::try_from(row_range.end) diff --git a/vortex-layout/src/layouts/struct_/reader.rs b/vortex-layout/src/layouts/struct_/reader.rs index fc665731bdd..ffa3949da35 100644 --- a/vortex-layout/src/layouts/struct_/reader.rs +++ b/vortex-layout/src/layouts/struct_/reader.rs @@ -44,6 +44,7 @@ use crate::LayoutReaderRef; use crate::LazyReaderChildren; use crate::layouts::partitioned::PartitionedExprEval; use crate::layouts::struct_::StructLayout; +use crate::reader::debug_assert_mask_matches_row_range; use crate::segments::SegmentSource; pub struct StructReader { @@ -249,6 +250,8 @@ impl LayoutReader for StructReader { expr: &Expression, mask: Mask, ) -> VortexResult { + debug_assert_mask_matches_row_range(mask.len(), row_range, "struct pruning"); + // Partition the expression into expressions that can be evaluated over individual fields match &self.partition_expr(expr.clone()) { Partitioned::Single(name, partition) => self @@ -271,6 +274,8 @@ impl LayoutReader for StructReader { expr: &Expression, mask: MaskFuture, ) -> VortexResult { + debug_assert_mask_matches_row_range(mask.len(), row_range, "struct filter"); + // Partition the expression into expressions that can be evaluated over individual fields match &self.partition_expr(expr.clone()) { Partitioned::Single(name, partition) => self @@ -308,6 +313,8 @@ impl LayoutReader for StructReader { expr: &Expression, mask_fut: MaskFuture, ) -> VortexResult { + debug_assert_mask_matches_row_range(mask_fut.len(), row_range, "struct projection"); + let validity_fut = self .validity()? .map(|reader| reader.projection_evaluation(row_range, &root(), mask_fut.clone())) diff --git a/vortex-layout/src/layouts/zoned/reader.rs b/vortex-layout/src/layouts/zoned/reader.rs index 50b7780cae0..f9c1d013cbe 100644 --- a/vortex-layout/src/layouts/zoned/reader.rs +++ b/vortex-layout/src/layouts/zoned/reader.rs @@ -39,6 +39,7 @@ use crate::LayoutReaderRef; use crate::LazyReaderChildren; use crate::layouts::zoned::ZonedLayout; use crate::layouts::zoned::zone_map::ZoneMap; +use crate::reader::debug_assert_mask_matches_row_range; use crate::segments::SegmentSource; type SharedZoneMap = Shared>>; @@ -243,6 +244,8 @@ impl LayoutReader for ZonedReader { expr: &Expression, mask: Mask, ) -> VortexResult { + debug_assert_mask_matches_row_range(mask.len(), row_range, "zoned pruning"); + tracing::debug!("Stats pruning evaluation: {} - {}", &self.name, expr); let data_eval = self .data_child()? @@ -315,6 +318,7 @@ impl LayoutReader for ZonedReader { expr: &Expression, mask: MaskFuture, ) -> VortexResult { + debug_assert_mask_matches_row_range(mask.len(), row_range, "zoned filter"); self.data_child()?.filter_evaluation(row_range, expr, mask) } @@ -324,6 +328,8 @@ impl LayoutReader for ZonedReader { expr: &Expression, mask: MaskFuture, ) -> VortexResult>> { + debug_assert_mask_matches_row_range(mask.len(), row_range, "zoned projection"); + // TODO(ngates): there are some projection expressions that we may also be able to // short-circuit with statistics. self.data_child()? diff --git a/vortex-layout/src/reader.rs b/vortex-layout/src/reader.rs index 9ccd695c572..52c6dcb6dfa 100644 --- a/vortex-layout/src/reader.rs +++ b/vortex-layout/src/reader.rs @@ -15,6 +15,7 @@ use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; use vortex_array::dtype::FieldMask; use vortex_array::expr::Expression; +use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_mask::Mask; @@ -91,6 +92,35 @@ pub trait LayoutReader: 'static + Send + Sync { pub type ArrayFuture = BoxFuture<'static, VortexResult>; +#[cfg(debug_assertions)] +#[inline] +pub(crate) fn debug_assert_mask_matches_row_range( + mask_len: usize, + row_range: &Range, + context: &str, +) { + let row_range_len = usize::try_from( + row_range + .end + .checked_sub(row_range.start) + .vortex_expect("row range underflow"), + ) + .vortex_expect("row range length must fit usize"); + debug_assert_eq!( + mask_len, row_range_len, + "{context}: mask length must match row range length" + ); +} + +#[cfg(not(debug_assertions))] +#[inline] +pub(crate) fn debug_assert_mask_matches_row_range( + _mask_len: usize, + _row_range: &Range, + _context: &str, +) { +} + pub trait ArrayFutureExt { fn masked(self, mask: MaskFuture) -> Self; }