@@ -16,7 +16,6 @@ use vortex_array::arrays::ExtensionArray;
1616use vortex_array:: arrays:: FixedSizeListArray ;
1717use vortex_array:: arrays:: ListArray ;
1818use vortex_array:: arrays:: ListViewArray ;
19- use vortex_array:: arrays:: Primitive ;
2019use vortex_array:: arrays:: StructArray ;
2120use vortex_array:: arrays:: TemporalArray ;
2221use vortex_array:: arrays:: listview:: list_from_list_view;
@@ -30,18 +29,14 @@ use vortex_array::scalar::Scalar;
3029use vortex_array:: vtable:: ValidityHelper ;
3130use vortex_error:: VortexResult ;
3231
32+ use crate :: ArrayAndStats ;
3333use crate :: BtrBlocksCompressorBuilder ;
3434use crate :: CompressorContext ;
35- use crate :: CompressorStats ;
3635use crate :: GenerateStatsOptions ;
3736use crate :: Scheme ;
3837use crate :: SchemeId ;
39- use crate :: StatsCache ;
4038use crate :: compressor:: decimal:: compress_decimal;
41- use crate :: compressor:: float:: FloatStats ;
4239use crate :: compressor:: integer:: DictScheme as IntDictScheme ;
43- use crate :: compressor:: integer:: IntegerStats ;
44- use crate :: compressor:: string:: StringStats ;
4540use crate :: compressor:: temporal:: compress_temporal;
4641
4742/// The main compressor type implementing BtrBlocks-inspired compression.
@@ -223,69 +218,41 @@ impl BtrBlocksCompressor {
223218 }
224219
225220 let before_nbytes = array. nbytes ( ) ;
226- let needs_distinct = eligible. iter ( ) . any ( |s| s. needs_distinct_values ( ) ) ;
227- let mut cache = StatsCache :: new ( ) ;
228-
229- // Pre-populate the stats cache with the right `count_distinct_values` setting.
230- // This matches the old `gen_stats` behavior where distinct values were only computed
231- // when Dict was in the scheme list.
232- if let Some ( prim) = array. as_opt :: < Primitive > ( ) {
233- let prim = prim. to_primitive ( ) ;
234- if prim. ptype ( ) . is_int ( ) {
235- cache. get_or_insert_with :: < IntegerStats > ( || {
236- IntegerStats :: generate_opts (
237- & prim,
238- GenerateStatsOptions {
239- count_distinct_values : needs_distinct,
240- } ,
241- )
242- } ) ;
243- } else {
244- cache. get_or_insert_with :: < FloatStats > ( || {
245- FloatStats :: generate_opts (
246- & prim,
247- GenerateStatsOptions {
248- count_distinct_values : needs_distinct,
249- } ,
250- )
251- } ) ;
252- }
253- } else if array. as_opt :: < vortex_array:: arrays:: VarBinView > ( ) . is_some ( ) {
254- cache. get_or_insert_with :: < StringStats > ( || {
255- StringStats :: generate_opts (
256- & array. to_varbinview ( ) ,
257- GenerateStatsOptions {
258- count_distinct_values : needs_distinct,
259- } ,
260- )
221+ let merged_opts = eligible
222+ . iter ( )
223+ . fold ( GenerateStatsOptions :: default ( ) , |acc, s| {
224+ acc. merge ( s. stats_options ( ) )
261225 } ) ;
262- }
263226
264- if let Some ( winner) = self . choose_scheme ( & eligible, & array, ctx, & mut cache, excludes) ? {
265- let compressed = winner. compress ( self , & array, ctx, & mut cache, excludes) ?;
227+ let mut ctx = ctx;
228+ ctx. stats_options = merged_opts;
229+
230+ let mut data = ArrayAndStats :: new ( array, merged_opts) ;
231+
232+ if let Some ( winner) = self . choose_scheme ( & eligible, & mut data, ctx, excludes) ? {
233+ let compressed = winner. compress ( self , & mut data, ctx, excludes) ?;
266234 if compressed. nbytes ( ) < before_nbytes {
267235 return Ok ( compressed) ;
268236 }
269237 }
270238
271239 // No scheme improved on the original.
272- Ok ( array )
240+ Ok ( data . into_array ( ) )
273241 }
274242
275243 /// Evaluates each candidate scheme and returns the one with the best compression ratio
276244 /// (must be > 1.0).
277245 fn choose_scheme (
278246 & self ,
279247 schemes : & [ & ' static dyn Scheme ] ,
280- array : & ArrayRef ,
248+ data : & mut ArrayAndStats ,
281249 ctx : CompressorContext ,
282- cache : & mut StatsCache ,
283250 excludes : & [ SchemeId ] ,
284251 ) -> VortexResult < Option < & ' static dyn Scheme > > {
285252 let mut best: Option < ( & ' static dyn Scheme , f64 ) > = None ;
286253
287254 for & scheme in schemes {
288- let ratio = self . evaluate_scheme ( scheme, array , ctx, cache , excludes) ?;
255+ let ratio = self . evaluate_scheme ( scheme, data , ctx, excludes) ?;
289256 if is_valid_ratio ( ratio) && ratio > 1.0 && best. is_none_or ( |( _, r) | ratio > r) {
290257 best = Some ( ( scheme, ratio) ) ;
291258 }
@@ -298,12 +265,11 @@ impl BtrBlocksCompressor {
298265 fn evaluate_scheme (
299266 & self ,
300267 scheme : & ' static dyn Scheme ,
301- array : & ArrayRef ,
268+ data : & mut ArrayAndStats ,
302269 ctx : CompressorContext ,
303- cache : & mut StatsCache ,
304270 excludes : & [ SchemeId ] ,
305271 ) -> VortexResult < f64 > {
306- let ratio = scheme. expected_compression_ratio ( self , array , ctx, cache , excludes) ?;
272+ let ratio = scheme. expected_compression_ratio ( self , data , ctx, excludes) ?;
307273
308274 tracing:: debug!(
309275 scheme = %scheme. id( ) ,
0 commit comments