Skip to content

Commit 9ade42d

Browse files
authored
docs: add rustdoc examples across sketches (#58)
* docs: add rustdoc examples across sketches Signed-off-by: Chojan Shang <psiace@apache.org> * docs: make docs clean and simple Signed-off-by: Chojan Shang <psiace@apache.org> * docs: align examples with Rust conventions Signed-off-by: Chojan Shang <psiace@apache.org> --------- Signed-off-by: Chojan Shang <psiace@apache.org>
1 parent 9a76561 commit 9ade42d

13 files changed

Lines changed: 691 additions & 0 deletions

File tree

datasketches/src/countmin/mod.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,25 @@
1919
//!
2020
//! The Count-Min sketch provides approximate frequency counts for streaming data
2121
//! with configurable relative error and confidence bounds.
22+
//!
23+
//! # Usage
24+
//!
25+
//! ```rust
26+
//! # use datasketches::countmin::CountMinSketch;
27+
//! let mut sketch = CountMinSketch::new(5, 256);
28+
//! sketch.update("apple");
29+
//! sketch.update_with_weight("banana", 3);
30+
//! assert!(sketch.estimate("banana") >= 3);
31+
//! ```
32+
//!
33+
//! # Configuration Helpers
34+
//!
35+
//! ```rust
36+
//! # use datasketches::countmin::CountMinSketch;
37+
//! let buckets = CountMinSketch::suggest_num_buckets(0.01);
38+
//! let hashes = CountMinSketch::suggest_num_hashes(0.99);
39+
//! let _sketch = CountMinSketch::new(hashes, buckets);
40+
//! ```
2241
2342
mod serialization;
2443

datasketches/src/countmin/sketch.rs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,14 @@ impl CountMinSketch {
5454
///
5555
/// Panics if `num_hashes` is 0, `num_buckets` is less than 3, or the
5656
/// total table size exceeds the supported limit.
57+
///
58+
/// # Examples
59+
///
60+
/// ```rust
61+
/// # use datasketches::countmin::CountMinSketch;
62+
/// let sketch = CountMinSketch::new(4, 128);
63+
/// assert_eq!(sketch.num_buckets(), 128);
64+
/// ```
5765
pub fn new(num_hashes: u8, num_buckets: u32) -> Self {
5866
Self::with_seed(num_hashes, num_buckets, DEFAULT_UPDATE_SEED)
5967
}
@@ -64,6 +72,14 @@ impl CountMinSketch {
6472
///
6573
/// Panics if `num_hashes` is 0, `num_buckets` is less than 3, or the
6674
/// total table size exceeds the supported limit.
75+
///
76+
/// # Examples
77+
///
78+
/// ```rust
79+
/// # use datasketches::countmin::CountMinSketch;
80+
/// let sketch = CountMinSketch::with_seed(4, 64, 42);
81+
/// assert_eq!(sketch.seed(), 42);
82+
/// ```
6783
pub fn with_seed(num_hashes: u8, num_buckets: u32, seed: u64) -> Self {
6884
let entries = entries_for_config(num_hashes, num_buckets);
6985
Self::make(num_hashes, num_buckets, seed, entries)
@@ -127,11 +143,29 @@ impl CountMinSketch {
127143
}
128144

129145
/// Updates the sketch with a single occurrence of the item.
146+
///
147+
/// # Examples
148+
///
149+
/// ```rust
150+
/// # use datasketches::countmin::CountMinSketch;
151+
/// let mut sketch = CountMinSketch::new(4, 128);
152+
/// sketch.update("apple");
153+
/// assert!(sketch.estimate("apple") >= 1);
154+
/// ```
130155
pub fn update<T: Hash>(&mut self, item: T) {
131156
self.update_with_weight(item, 1);
132157
}
133158

134159
/// Updates the sketch with the given item and weight.
160+
///
161+
/// # Examples
162+
///
163+
/// ```rust
164+
/// # use datasketches::countmin::CountMinSketch;
165+
/// let mut sketch = CountMinSketch::new(4, 128);
166+
/// sketch.update_with_weight("banana", 3);
167+
/// assert!(sketch.estimate("banana") >= 3);
168+
/// ```
135169
pub fn update_with_weight<T: Hash>(&mut self, item: T, weight: i64) {
136170
if weight == 0 {
137171
return;
@@ -147,6 +181,15 @@ impl CountMinSketch {
147181
}
148182

149183
/// Returns the estimated frequency of the given item.
184+
///
185+
/// # Examples
186+
///
187+
/// ```rust
188+
/// # use datasketches::countmin::CountMinSketch;
189+
/// let mut sketch = CountMinSketch::new(4, 128);
190+
/// sketch.update_with_weight("pear", 2);
191+
/// assert!(sketch.estimate("pear") >= 2);
192+
/// ```
150193
pub fn estimate<T: Hash>(&self, item: T) -> i64 {
151194
let num_buckets = self.num_buckets as usize;
152195
let mut min = i64::MAX;
@@ -178,6 +221,20 @@ impl CountMinSketch {
178221
/// # Panics
179222
///
180223
/// Panics if the sketches have incompatible configurations.
224+
///
225+
/// # Examples
226+
///
227+
/// ```rust
228+
/// # use datasketches::countmin::CountMinSketch;
229+
/// let mut left = CountMinSketch::new(4, 128);
230+
/// let mut right = CountMinSketch::new(4, 128);
231+
///
232+
/// left.update("apple");
233+
/// right.update_with_weight("banana", 2);
234+
///
235+
/// left.merge(&right);
236+
/// assert!(left.estimate("banana") >= 2);
237+
/// ```
181238
pub fn merge(&mut self, other: &CountMinSketch) {
182239
if std::ptr::eq(self, other) {
183240
panic!("Cannot merge a sketch with itself.");
@@ -195,6 +252,17 @@ impl CountMinSketch {
195252
}
196253

197254
/// Serializes this sketch into the DataSketches Count-Min format.
255+
///
256+
/// # Examples
257+
///
258+
/// ```rust
259+
/// # use datasketches::countmin::CountMinSketch;
260+
/// # let mut sketch = CountMinSketch::new(4, 128);
261+
/// # sketch.update("apple");
262+
/// let bytes = sketch.serialize();
263+
/// let decoded = CountMinSketch::deserialize(&bytes).unwrap();
264+
/// assert!(decoded.estimate("apple") >= 1);
265+
/// ```
198266
pub fn serialize(&self) -> Vec<u8> {
199267
let header_size = PREAMBLE_LONGS_SHORT as usize * LONG_SIZE_BYTES;
200268
let payload_size = if self.is_empty() {
@@ -227,11 +295,33 @@ impl CountMinSketch {
227295
}
228296

229297
/// Deserializes a sketch from bytes using the default seed.
298+
///
299+
/// # Examples
300+
///
301+
/// ```rust
302+
/// # use datasketches::countmin::CountMinSketch;
303+
/// # let mut sketch = CountMinSketch::new(4, 64);
304+
/// # sketch.update("apple");
305+
/// # let bytes = sketch.serialize();
306+
/// let decoded = CountMinSketch::deserialize(&bytes).unwrap();
307+
/// assert!(decoded.estimate("apple") >= 1);
308+
/// ```
230309
pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
231310
Self::deserialize_with_seed(bytes, DEFAULT_UPDATE_SEED)
232311
}
233312

234313
/// Deserializes a sketch from bytes using the provided seed.
314+
///
315+
/// # Examples
316+
///
317+
/// ```rust
318+
/// # use datasketches::countmin::CountMinSketch;
319+
/// # let mut sketch = CountMinSketch::with_seed(4, 64, 7);
320+
/// # sketch.update("apple");
321+
/// # let bytes = sketch.serialize();
322+
/// let decoded = CountMinSketch::deserialize_with_seed(&bytes, 7).unwrap();
323+
/// assert!(decoded.estimate("apple") >= 1);
324+
/// ```
235325
pub fn deserialize_with_seed(bytes: &[u8], seed: u64) -> Result<Self, Error> {
236326
fn make_error(tag: &'static str) -> impl FnOnce(std::io::Error) -> Error {
237327
move |_| Error::insufficient_data(tag)

datasketches/src/error.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,16 @@ impl fmt::Display for ErrorKind {
4646
}
4747

4848
/// Error is the error struct returned by all datasketches functions.
49+
///
50+
/// # Examples
51+
///
52+
/// ```
53+
/// # use datasketches::error::Error;
54+
/// # use datasketches::error::ErrorKind;
55+
/// let err = Error::new(ErrorKind::InvalidArgument, "bad input");
56+
/// assert_eq!(err.kind(), ErrorKind::InvalidArgument);
57+
/// assert_eq!(err.message(), "bad input");
58+
/// ```
4959
pub struct Error {
5060
kind: ErrorKind,
5161
message: String,

datasketches/src/frequencies/mod.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,30 @@
2323
//!
2424
//! For background, see the Java documentation:
2525
//! <https://apache.github.io/datasketches-java/9.0.0/org/apache/datasketches/frequencies/FrequentItemsSketch.html>
26+
//!
27+
//! # Usage
28+
//!
29+
//! ```rust
30+
//! # use datasketches::frequencies::ErrorType;
31+
//! # use datasketches::frequencies::FrequentItemsSketch;
32+
//! let mut sketch = FrequentItemsSketch::<i64>::new(64);
33+
//! sketch.update_with_count(1, 3);
34+
//! sketch.update(2);
35+
//! let rows = sketch.frequent_items(ErrorType::NoFalseNegatives);
36+
//! assert!(rows.iter().any(|row| *row.item() == 1));
37+
//! ```
38+
//!
39+
//! # Serialization
40+
//!
41+
//! ```rust
42+
//! # use datasketches::frequencies::FrequentItemsSketch;
43+
//! let mut sketch = FrequentItemsSketch::<i64>::new(64);
44+
//! sketch.update_with_count(42, 2);
45+
//!
46+
//! let bytes = sketch.serialize();
47+
//! let decoded = FrequentItemsSketch::<i64>::deserialize(&bytes).unwrap();
48+
//! assert!(decoded.estimate(&42) >= 2);
49+
//! ```
2650
2751
mod reverse_purge_item_hash_map;
2852
mod serialization;

0 commit comments

Comments
 (0)