From 355df2d338785577b58d0d5caa7e2ed36c536b9c Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Thu, 19 Mar 2026 10:40:39 -0400 Subject: [PATCH] compress dict array values Signed-off-by: Connor Tsui --- vortex-btrblocks/src/compressor/float/mod.rs | 3 ++- .../src/compressor/integer/mod.rs | 25 +++++++++++++------ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/vortex-btrblocks/src/compressor/float/mod.rs b/vortex-btrblocks/src/compressor/float/mod.rs index 57bb4dc65f3..1897c1e5ced 100644 --- a/vortex-btrblocks/src/compressor/float/mod.rs +++ b/vortex-btrblocks/src/compressor/float/mod.rs @@ -433,13 +433,14 @@ impl Scheme for DictScheme { let has_all_values_referenced = dict.has_all_values_referenced(); let DictArrayParts { codes, values, .. } = dict.into_parts(); + // TODO(connor): This should probably be extending the `excludes` list. let compressed_codes = compressor.compress_canonical( Canonical::Primitive(codes.to_primitive()), ctx.descend(), Excludes::int_only(&[IntCode::Dict, IntCode::Sequence]), )?; - assert!(values.is_canonical()); + // TODO(connor): This should probably be extending the `excludes` list. let compressed_values = compressor.compress_canonical( Canonical::Primitive(values.to_primitive()), ctx.descend(), diff --git a/vortex-btrblocks/src/compressor/integer/mod.rs b/vortex-btrblocks/src/compressor/integer/mod.rs index 2c46e4cdd80..bdce7d7fa9f 100644 --- a/vortex-btrblocks/src/compressor/integer/mod.rs +++ b/vortex-btrblocks/src/compressor/integer/mod.rs @@ -703,21 +703,32 @@ impl Scheme for DictScheme { let dict = dictionary_encode(stats); - // Cascade the codes child - // Don't allow SequenceArray as the codes child as it merely adds extra indirection without actually compressing data. - let mut new_excludes = vec![IntCode::Dict, IntCode::Sequence]; - new_excludes.extend_from_slice(excludes); + // Cascade the codes child. + // Don't allow SequenceArray as the codes child as it merely adds extra indirection without + // actually compressing data. + let mut codes_excludes = vec![IntCode::Dict, IntCode::Sequence]; + codes_excludes.extend_from_slice(excludes); let compressed_codes = compressor.compress_canonical( Canonical::Primitive(dict.codes().to_primitive().narrow()?), ctx.descend(), - Excludes::int_only(&new_excludes), + Excludes::int_only(&codes_excludes), + )?; + + // Cascade the values child. + let mut values_excludes = vec![IntCode::Dict]; + values_excludes.extend_from_slice(excludes); + + let compressed_values = compressor.compress_canonical( + Canonical::Primitive(dict.values().to_primitive()), + ctx.descend(), + Excludes::int_only(&values_excludes), )?; - // SAFETY: compressing codes does not change their values + // SAFETY: Compressing the arrays does not change their logical values. unsafe { Ok( - DictArray::new_unchecked(compressed_codes, dict.values().clone()) + DictArray::new_unchecked(compressed_codes, compressed_values) .set_all_values_referenced(dict.has_all_values_referenced()) .into_array(), )