Skip to content

Commit 788369c

Browse files
committed
feat: add THETA family support and refactor prelong validate in CompactThetaSketch
1 parent 28ae034 commit 788369c

3 files changed

Lines changed: 21 additions & 9 deletions

File tree

datasketches/src/codec/family.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,14 @@ impl Family {
8080
min_pre_longs: 3,
8181
max_pre_longs: 4,
8282
};
83+
84+
/// Theta Sketch for cardinality estimation.
85+
pub const THETA: Family = Family {
86+
id: 3,
87+
name: "THETA",
88+
min_pre_longs: 1,
89+
max_pre_longs: 3,
90+
};
8391
}
8492

8593
impl Family {

datasketches/src/theta/serialization.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
2020
pub(crate) const UNCOMPRESSED_SERIAL_VERSION: u8 = 3;
2121
pub(crate) const COMPRESSED_SERIAL_VERSION: u8 = 4;
22-
pub(crate) const THETA_FAMILY_ID: u8 = 3;
2322

2423
pub(crate) const FLAGS_IS_READ_ONLY: u8 = 1 << 1;
2524
pub(crate) const FLAGS_IS_EMPTY: u8 = 1 << 2;

datasketches/src/theta/sketch.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use std::hash::Hash;
2424

2525
use crate::codec::SketchBytes;
2626
use crate::codec::SketchSlice;
27+
use crate::codec::family::Family;
2728
use crate::common::NumStdDev;
2829
use crate::common::ResizeFactor;
2930
use crate::common::binomial_bounds;
@@ -431,7 +432,7 @@ impl CompactThetaSketch {
431432
let pre_longs = self.preamble_longs(false);
432433
bytes.write_u8(pre_longs);
433434
bytes.write_u8(serialization::UNCOMPRESSED_SERIAL_VERSION);
434-
bytes.write_u8(serialization::THETA_FAMILY_ID);
435+
bytes.write_u8(Family::THETA.id);
435436
bytes.write_u16_be(0); // unused for compact
436437

437438
let mut flags = 0u8;
@@ -473,7 +474,7 @@ impl CompactThetaSketch {
473474

474475
bytes.write_u8(pre_longs);
475476
bytes.write_u8(serialization::COMPRESSED_SERIAL_VERSION);
476-
bytes.write_u8(serialization::THETA_FAMILY_ID);
477+
bytes.write_u8(Family::THETA.id);
477478
bytes.write_u8(entry_bits);
478479
bytes.write_u8(num_entries_bytes);
479480

@@ -564,12 +565,16 @@ impl CompactThetaSketch {
564565
let ser_ver = cursor.read_u8().map_err(make_error("serial_version"))?;
565566
let family_id = cursor.read_u8().map_err(make_error("family_id"))?;
566567

567-
if family_id != serialization::THETA_FAMILY_ID {
568-
return Err(Error::invalid_family(
569-
serialization::THETA_FAMILY_ID,
570-
family_id,
571-
"CompactThetaSketch",
572-
));
568+
Family::THETA.validate_id(family_id)?;
569+
570+
// Validate pre_longs is within valid range for Theta sketch
571+
if !(Family::THETA.min_pre_longs..=Family::THETA.max_pre_longs).contains(&pre_longs) {
572+
return Err(Error::deserial(format!(
573+
"invalid preamble longs: expected [{}, {}], got {}",
574+
Family::THETA.min_pre_longs,
575+
Family::THETA.max_pre_longs,
576+
pre_longs,
577+
)));
573578
}
574579

575580
match ser_ver {

0 commit comments

Comments
 (0)