Added cumulative sum tracking in contiguousStorage

TairanJ · TairanJ · commit 8f8f23f73498 · 2026-01-25T22:55:18.000-05:00
diff --git a/QuantileFlow/ddsketch/core.py b/QuantileFlow/ddsketch/core.py
@@ -1,6 +1,6 @@
 """Core DDSketch implementation.
 
-Optimized for high throughput to match or exceed Datadog's implementation.
+Optimized for high throughput with efficient bucket indexing and quantile queries.
 """
 
 from typing import Literal, Union
@@ -80,7 +80,7 @@ def __init__(
         self.count = 0.0
         self.zero_count = 0.0
         
-        # Summary stats (like Datadog)
+        # Summary stats
         self._min = float('+inf')
         self._max = float('-inf')
         self._sum = 0.0
@@ -96,27 +96,33 @@ def insert(self, value: Union[int, float], weight: float = 1.0) -> None:
         Raises:
             ValueError: If value is negative and cont_neg is False.
         """
+        # Cache method lookups for hot path optimization
         if value > 0:
-            self.positive_store.add(self.mapping.compute_bucket_index(value), weight)
+            # Most common case: positive values
+            # Inline the hot path with cached local references
+            compute_idx = self.mapping.compute_bucket_index
+            self.positive_store.add(compute_idx(value), weight)
         elif value < 0:
             if self.cont_neg:
-                self.negative_store.add(self.mapping.compute_bucket_index(-value), weight)
+                compute_idx = self.mapping.compute_bucket_index
+                self.negative_store.add(compute_idx(-value), weight)
             else:
                 raise ValueError("Negative values not supported when cont_neg is False")
         else:
             self.zero_count += weight
         
-        # Track summary stats
+        # Track summary stats - combined update
         self.count += weight
         self._sum += value * weight
+        # Update min/max - use local to avoid repeated attribute access
         if value < self._min:
             self._min = value
         if value > self._max:
             self._max = value
     
-    # Alias for compatibility with Datadog's API
+    # Alias for API compatibility
     def add(self, value: Union[int, float], weight: float = 1.0) -> None:
-        """Alias for insert() to match Datadog's API."""
+        """Alias for insert()."""
         self.insert(value, weight)
     
     def delete(self, value: Union[int, float]) -> None:
@@ -186,9 +192,9 @@ def quantile(self, q: float) -> float:
         key = self.positive_store.key_at_rank(rank)
         return self.mapping.compute_value_from_index(key)
     
-    # Alias for Datadog compatibility
+    # Alias for API compatibility
     def get_quantile_value(self, quantile: float) -> float:
-        """Alias for quantile() to match Datadog's API."""
+        """Alias for quantile()."""
         try:
             return self.quantile(quantile)
         except ValueError:
diff --git a/QuantileFlow/ddsketch/mapping/cubic_interpolation.py b/QuantileFlow/ddsketch/mapping/cubic_interpolation.py
@@ -1,10 +1,5 @@
 """
-This file contains a Python implementation of the cubic interpolation mapping algorithm 
-described in Datadog's Java DDSketch implementation (https://github.com/DataDog/sketches-java).
-
-Original work Copyright 2021 Datadog, Inc.
-Licensed under Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
-
+Cubic interpolation mapping scheme for DDSketch.
 
 This implementation approximates the memory-optimal logarithmic mapping by:
 1. Extracting the floor value of log2 from binary representation
@@ -34,8 +29,7 @@ def __init__(self, relative_accuracy: float):
         
         # Multiplier m = 7/(10*log(2)) ≈ 1.01
         # This gives us the minimum multiplier that maintains relative accuracy guarantee
-        # Divide by C as per Datadog's implementation
-        self.m = 1/ (self.C * math.log(2))
+        self.m = 1 / (self.C * math.log(2))
         
     def _extract_exponent_and_significand(self, value: float) -> tuple[int, float]:
         """
@@ -55,7 +49,7 @@ def _cubic_interpolation(self, s: float) -> float:
         Compute the cubic interpolation P(s) = As³ + Bs² + Cs
         where s is the normalized significand in [0, 1).
         """
-        # Use Datadog's order of operations for better numerical stability
+        # Use Horner's method for better numerical stability
         return s * (self.C + s * (self.B + s * self.A))
         
     def compute_bucket_index(self, value: float) -> int:
diff --git a/QuantileFlow/ddsketch/mapping/logarithmic.py b/QuantileFlow/ddsketch/mapping/logarithmic.py
@@ -19,11 +19,11 @@ def __init__(self, relative_accuracy: float):
         self.multiplier = 1 / math.log(self.gamma)
     
     def key(self, value: float) -> int:
-        """Alias for compute_bucket_index for Datadog API compatibility."""
+        """Alias for compute_bucket_index for API compatibility."""
         return self.compute_bucket_index(value)
     
     def value(self, key: int) -> float:
-        """Alias for compute_value_from_index for Datadog API compatibility."""
+        """Alias for compute_value_from_index for API compatibility."""
         return self.compute_value_from_index(key)
         
     def compute_bucket_index(self, value: float) -> int:
diff --git a/QuantileFlow/ddsketch/storage/contiguous.py b/QuantileFlow/ddsketch/storage/contiguous.py
@@ -1,15 +1,15 @@
 """Contiguous array storage implementation for DDSketch using offset-based indexing.
 
 Optimized for high throughput by using Python lists instead of numpy arrays
-and adopting Datadog's chunk-based dynamic growth pattern.
+and chunk-based dynamic growth pattern.
 """
 
 import math
 import warnings
 from .base import Storage
 
 
-# Chunk size for dynamic growth (matches Datadog's default)
+# Chunk size for dynamic growth
 CHUNK_SIZE = 128
 
 
@@ -32,7 +32,8 @@ class ContiguousStorage(Storage):
 
     __slots__ = ('count', 'bins', 'min_key', 'max_key', 
                  'offset', 'collapse_count', 'bin_limit', 
-                 'chunk_size', 'is_collapsed')
+                 'chunk_size', 'is_collapsed',
+                 '_cumulative_sums', '_cumulative_valid')
     
     def __init__(self, bin_limit: int = 2048, chunk_size: int = CHUNK_SIZE, max_buckets: int = None):
         """
@@ -51,7 +52,7 @@ def __init__(self, bin_limit: int = 2048, chunk_size: int = CHUNK_SIZE, max_buck
             raise ValueError("bin_limit must be positive for ContiguousStorage")
         
         # Don't call super().__init__ to avoid overhead - inline what we need
-        self.count = 0.0  # Use float like Datadog for weighted values
+        self.count = 0.0  # Use float for weighted values
         self.bins = []  # Start empty, grow dynamically
         self.bin_limit = bin_limit
         self.chunk_size = chunk_size
@@ -60,6 +61,9 @@ def __init__(self, bin_limit: int = 2048, chunk_size: int = CHUNK_SIZE, max_buck
         self.offset = 0
         self.collapse_count = 0
         self.is_collapsed = False
+        # Lazy cumulative sums for O(log n) quantile queries
+        self._cumulative_sums = []
+        self._cumulative_valid = False
     
     @property
     def total_count(self):
@@ -118,19 +122,29 @@ def add(self, key, weight=1.0):
         idx = self._get_index(key)
         self.bins[idx] += weight
         self.count += weight
+        self._cumulative_valid = False
     
     def _get_index(self, key):
-        """Calculate the bin index for the key, extending the range if necessary."""
-        if self.min_key is None:
+        """Calculate the bin index for the key, extending the range if necessary.
+        
+        Optimized for the common case where key is within the existing range.
+        """
+        # Fast path: key is within existing range (most common case)
+        min_key = self.min_key
+        if min_key is not None and min_key <= key <= self.max_key:
+            return key - self.offset
+        
+        # Slow path: need to extend range or handle edge cases
+        if min_key is None:
             # First insertion
             self._extend_range(key)
-        elif key < self.min_key:
+        elif key < min_key:
             if self.is_collapsed:
                 return 0
             self._extend_range(key)
             if self.is_collapsed:
                 return 0
-        elif key > self.max_key:
+        else:  # key > self.max_key
             self._extend_range(key)
         
         return key - self.offset
@@ -241,6 +255,7 @@ def remove(self, bucket_index: int, count: int = 1) -> bool:
             
             self.bins[pos] = max(0, old_count - count)
             self.count = max(0, self.count - count)
+            self._cumulative_valid = False
             
             # Update min/max keys if we emptied a boundary bucket
             if old_count > 0 and self.bins[pos] == 0:
@@ -282,11 +297,27 @@ def get_count(self, bucket_index: int) -> int:
             return 0
         return int(self.bins[pos])
     
+    def _rebuild_cumulative_sums(self):
+        """Rebuild cumulative sums array for O(log n) rank queries."""
+        bins = self.bins
+        n = len(bins)
+        if n == 0:
+            self._cumulative_sums = []
+        else:
+            # Build cumulative sums
+            cumsum = [0.0] * n
+            running = 0.0
+            for i in range(n):
+                running += bins[i]
+                cumsum[i] = running
+            self._cumulative_sums = cumsum
+        self._cumulative_valid = True
+    
     def key_at_rank(self, rank, lower=True):
         """
         Return the key for the value at given rank.
         
-        This method is compatible with Datadog's interface.
+        Uses lazy cumulative sums and binary search for O(log n) performance.
         
         Args:
             rank: The rank to find.
@@ -296,11 +327,37 @@ def key_at_rank(self, rank, lower=True):
         Returns:
             The key at the specified rank.
         """
-        running_ct = 0.0
-        for i, bin_ct in enumerate(self.bins):
-            running_ct += bin_ct
-            if (lower and running_ct > rank) or (not lower and running_ct >= rank + 1):
-                return i + self.offset
+        if not self._cumulative_valid:
+            self._rebuild_cumulative_sums()
+        
+        cumsum = self._cumulative_sums
+        n = len(cumsum)
+        if n == 0:
+            return self.max_key if self.max_key is not None else 0
+        
+        # Use binary search for O(log n) lookup
+        # Binary search to find first index where condition is true
+        lo, hi = 0, n
+        if lower:
+            # Find first index where cumsum[i] > rank
+            while lo < hi:
+                mid = (lo + hi) >> 1
+                if cumsum[mid] > rank:
+                    hi = mid
+                else:
+                    lo = mid + 1
+        else:
+            # Find first index where cumsum[i] >= rank + 1
+            target = rank + 1
+            while lo < hi:
+                mid = (lo + hi) >> 1
+                if cumsum[mid] >= target:
+                    hi = mid
+                else:
+                    lo = mid + 1
+        
+        if lo < n:
+            return lo + self.offset
         
         return self.max_key if self.max_key is not None else 0
     
@@ -329,6 +386,7 @@ def merge(self, other: 'ContiguousStorage'):
                     self.bins[self_idx] += other.bins[other_idx]
         
         self.count += other.count
+        self._cumulative_valid = False
     
     def copy(self, store: 'ContiguousStorage'):
         """Copy another storage into this one."""
@@ -339,3 +397,4 @@ def copy(self, store: 'ContiguousStorage'):
         self.offset = store.offset
         self.is_collapsed = store.is_collapsed
         self.collapse_count = store.collapse_count
+        self._cumulative_valid = False
diff --git a/QuantileFlow/ddsketch/storage/sparse.py b/QuantileFlow/ddsketch/storage/sparse.py
@@ -1,6 +1,6 @@
 """Sparse storage implementation for DDSketch using dictionary."""
 
-from typing import Dict
+from typing import Dict, List
 from .base import Storage, BucketManagementStrategy
 
 class SparseStorage(Storage):
@@ -26,6 +26,10 @@ def __init__(self, max_buckets: int = 2048,
         self.counts: Dict[int, int] = {}
         self.min_index = None  # Minimum bucket index seen
         self.max_index = None  # Maximum bucket index seen
+        # Cached sorted keys and cumulative sums for O(log n) quantile queries
+        self._sorted_keys: List[int] = []
+        self._cumulative_sums: List[float] = []
+        self._cache_valid: bool = False
     
     @property
     def count(self):
@@ -55,6 +59,7 @@ def add(self, bucket_index: int, count: int = 1):
             
         self.counts[bucket_index] = self.counts.get(bucket_index, 0) + count
         self.total_count += count
+        self._cache_valid = False
         
         # Update min and max indices
         if self.min_index is None or bucket_index < self.min_index:
@@ -85,6 +90,7 @@ def remove(self, bucket_index: int, count: int = 1) -> bool:
             
         self.counts[bucket_index] = max(0, self.counts[bucket_index] - count)
         self.total_count = max(0, self.total_count - count)
+        self._cache_valid = False
         
         if self.counts[bucket_index] == 0:
             del self.counts[bucket_index]
@@ -139,11 +145,30 @@ def collapse_smallest_buckets(self):
         # Merge buckets
         self.counts[i1] += self.counts[i0]
         del self.counts[i0]
+        self._cache_valid = False
+    
+    def _rebuild_cache(self):
+        """Rebuild sorted keys and cumulative sums for O(log n) rank queries."""
+        if not self.counts:
+            self._sorted_keys = []
+            self._cumulative_sums = []
+        else:
+            self._sorted_keys = sorted(self.counts.keys())
+            # Build cumulative sums
+            cumsum = []
+            running = 0.0
+            for key in self._sorted_keys:
+                running += self.counts[key]
+                cumsum.append(running)
+            self._cumulative_sums = cumsum
+        self._cache_valid = True
     
     def key_at_rank(self, rank, lower=True):
         """
         Return the key for the value at given rank.
         
+        Uses cached sorted keys and binary search for O(log n) performance.
+        
         Args:
             rank: The rank to find.
             lower: If True, return key where running_count > rank.
@@ -155,10 +180,35 @@ def key_at_rank(self, rank, lower=True):
         if not self.counts:
             return 0
         
-        running_ct = 0.0
-        for key in sorted(self.counts.keys()):
-            running_ct += self.counts[key]
-            if (lower and running_ct > rank) or (not lower and running_ct >= rank + 1):
-                return key
+        if not self._cache_valid:
+            self._rebuild_cache()
+        
+        cumsum = self._cumulative_sums
+        n = len(cumsum)
+        if n == 0:
+            return self.max_index if self.max_index is not None else 0
+        
+        # Use binary search for O(log n) lookup
+        lo, hi = 0, n
+        if lower:
+            # Find first index where cumsum[i] > rank
+            while lo < hi:
+                mid = (lo + hi) >> 1
+                if cumsum[mid] > rank:
+                    hi = mid
+                else:
+                    lo = mid + 1
+        else:
+            # Find first index where cumsum[i] >= rank + 1
+            target = rank + 1
+            while lo < hi:
+                mid = (lo + hi) >> 1
+                if cumsum[mid] >= target:
+                    hi = mid
+                else:
+                    lo = mid + 1
+        
+        if lo < len(self._sorted_keys):
+            return self._sorted_keys[lo]
         
-        return self.max_index if self.max_index is not None else 0 
+        return self.max_index if self.max_index is not None else 0