@@ -42,13 +42,10 @@ pub struct BloomFilter {
4242 pub ( super ) seed : u64 ,
4343 /// Number of hash functions to use (k)
4444 pub ( super ) num_hashes : u16 ,
45- /// Total number of bits in the filter (m)
46- pub ( super ) capacity_bits : u64 ,
4745 /// Count of bits set to 1 (for statistics)
4846 pub ( super ) num_bits_set : u64 ,
4947 /// Bit array packed into u64 words
50- /// Length = ceil(capacity_bits / 64)
51- pub ( super ) bit_array : Vec < u64 > ,
48+ pub ( super ) bit_array : Box < [ u64 ] > ,
5249}
5350
5451impl BloomFilter {
@@ -246,26 +243,10 @@ impl BloomFilter {
246243 /// // Now "apple" probably returns false, and most other items return true
247244 /// ```
248245 pub fn invert ( & mut self ) {
249- // Invert bits and count during operation (single pass)
250- let mut num_bits_set = 0 ;
251246 for word in & mut self . bit_array {
252247 * word = !* word;
253- num_bits_set += word. count_ones ( ) as u64 ;
254- }
255-
256- // Mask off excess bits in the last word
257- let excess_bits = self . capacity_bits % 64 ;
258- if excess_bits != 0 {
259- let last_idx = self . bit_array . len ( ) - 1 ;
260- let old_count = self . bit_array [ last_idx] . count_ones ( ) as u64 ;
261- let mask = ( 1u64 << excess_bits) - 1 ;
262- self . bit_array [ last_idx] &= mask;
263- let new_count = self . bit_array [ last_idx] . count_ones ( ) as u64 ;
264- // Adjust count for masked-off bits
265- num_bits_set = num_bits_set - old_count + new_count;
266248 }
267-
268- self . num_bits_set = num_bits_set;
249+ self . num_bits_set = self . capacity ( ) as u64 - self . num_bits_set ;
269250 }
270251
271252 /// Returns whether the filter is empty (no items inserted).
@@ -281,8 +262,8 @@ impl BloomFilter {
281262 }
282263
283264 /// Returns the total number of bits in the filter (capacity).
284- pub fn capacity ( & self ) -> u64 {
285- self . capacity_bits
265+ pub fn capacity ( & self ) -> usize {
266+ self . bit_array . len ( ) * 64
286267 }
287268
288269 /// Returns the number of hash functions used.
@@ -300,7 +281,7 @@ impl BloomFilter {
300281 /// Values near 0.5 indicate the filter is approaching saturation.
301282 /// Values above 0.5 indicate degraded false positive rates.
302283 pub fn load_factor ( & self ) -> f64 {
303- self . num_bits_set as f64 / self . capacity_bits as f64
284+ self . num_bits_set as f64 / self . capacity ( ) as f64
304285 }
305286
306287 /// Estimates the current false positive probability.
@@ -327,8 +308,8 @@ impl BloomFilter {
327308 /// - Capacity (number of bits)
328309 /// - Number of hash functions
329310 /// - Seed
330- pub fn is_compatible ( & self , other : & BloomFilter ) -> bool {
331- self . capacity_bits == other. capacity_bits
311+ pub fn is_compatible ( & self , other : & Self ) -> bool {
312+ self . bit_array . len ( ) == other. bit_array . len ( )
332313 && self . num_hashes == other. num_hashes
333314 && self . seed == other. seed
334315 }
@@ -374,8 +355,8 @@ impl BloomFilter {
374355
375356 bytes. write_u64_le ( self . seed ) ;
376357
377- // Bit array capacity stored as number of 64-bit words (int32) + unused padding (uint32)
378- let num_longs = ( self . capacity_bits / 64 ) as i32 ;
358+ // Bit array capacity is stored as number of 64-bit words (int32) + unused padding (uint32).
359+ let num_longs = self . bit_array . len ( ) as i32 ;
379360 bytes. write_i32_le ( num_longs) ;
380361 bytes. write_u32_le ( 0 ) ; // unused
381362
@@ -454,6 +435,13 @@ impl BloomFilter {
454435 let num_hashes = cursor
455436 . read_u16_le ( )
456437 . map_err ( |_| Error :: insufficient_data ( "num_hashes" ) ) ?;
438+ if num_hashes == 0 || num_hashes > i16:: MAX as u16 {
439+ return Err ( Error :: deserial ( format ! (
440+ "invalid num_hashes: expected [1, {}], got {}" ,
441+ i16 :: MAX ,
442+ num_hashes
443+ ) ) ) ;
444+ }
457445 // Bytes 6-7: unused (u16)
458446 let _unused = cursor
459447 . read_u16_le ( )
@@ -462,17 +450,23 @@ impl BloomFilter {
462450 . read_u64_le ( )
463451 . map_err ( |_| Error :: insufficient_data ( "seed" ) ) ?;
464452
465- // Bit array capacity stored as number of 64-bit words (int32) + unused padding (uint32)
453+ // Bit array capacity is stored as number of 64-bit words (int32) + unused padding (uint32).
466454 let num_longs = cursor
467455 . read_i32_le ( )
468- . map_err ( |_| Error :: insufficient_data ( "num_longs" ) ) ? as u64 ;
456+ . map_err ( |_| Error :: insufficient_data ( "num_longs" ) ) ?;
469457 let _unused = cursor
470458 . read_u32_le ( )
471459 . map_err ( |_| Error :: insufficient_data ( "unused" ) ) ?;
472460
473- let capacity_bits = num_longs * 64 ; // Convert longs to bits
461+ if num_longs <= 0 {
462+ return Err ( Error :: deserial ( format ! (
463+ "invalid num_longs: expected at least 1, got {}" ,
464+ num_longs
465+ ) ) ) ;
466+ }
467+
474468 let num_words = num_longs as usize ;
475- let mut bit_array = vec ! [ 0u64 ; num_words] ;
469+ let mut bit_array = vec ! [ 0u64 ; num_words] . into_boxed_slice ( ) ;
476470 let num_bits_set;
477471
478472 if is_empty {
@@ -493,14 +487,21 @@ impl BloomFilter {
493487 if raw_num_bits_set == DIRTY_BITS_VALUE {
494488 num_bits_set = bit_array. iter ( ) . map ( |w| w. count_ones ( ) as u64 ) . sum ( ) ;
495489 } else {
490+ let raw_num_words_set = raw_num_bits_set. div_ceil ( 64 ) as usize ;
491+ if raw_num_words_set > num_words {
492+ return Err ( Error :: deserial ( format ! (
493+ "invalid num_bits_set: expected <= {}, got {}" ,
494+ num_words * 64 ,
495+ raw_num_bits_set
496+ ) ) ) ;
497+ }
496498 num_bits_set = raw_num_bits_set;
497499 }
498500 }
499501
500502 Ok ( BloomFilter {
501503 seed,
502504 num_hashes,
503- capacity_bits,
504505 num_bits_set,
505506 bit_array,
506507 } )
@@ -554,7 +555,7 @@ impl BloomFilter {
554555 /// The right shift by 1 improves bit distribution. The index `i` is 1-based.
555556 fn compute_bit_index ( & self , h0 : u64 , h1 : u64 , i : u16 ) -> u64 {
556557 let hash = h0. wrapping_add ( u64:: from ( i) . wrapping_mul ( h1) ) ;
557- ( hash >> 1 ) % self . capacity_bits
558+ ( hash >> 1 ) % self . capacity ( ) as u64
558559 }
559560
560561 /// Gets the value of a single bit.
@@ -598,6 +599,13 @@ mod tests {
598599 assert_eq ! ( filter. num_hashes( ) , 5 ) ;
599600 }
600601
602+ #[ test]
603+ fn test_builder_with_size_rounds_to_word_boundary ( ) {
604+ let filter = BloomFilterBuilder :: with_size ( 1 , 3 ) . build ( ) ;
605+ assert_eq ! ( filter. capacity( ) , 64 ) ;
606+ assert_eq ! ( filter. num_hashes( ) , 3 ) ;
607+ }
608+
601609 #[ test]
602610 fn test_insert_and_contains ( ) {
603611 let mut filter = BloomFilterBuilder :: with_accuracy ( 100 , 0.01 ) . build ( ) ;
0 commit comments