From c6ade7e7a89edac8cbe44266a1779e521bd52856 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 7 Feb 2026 03:21:12 +0000
Subject: [PATCH] Fix numerically unstable variance calculation in
 CapacityEnvelope.from_values()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The computational variance formula E[X²] - E[X]² suffers from catastrophic
floating-point cancellation when capacity values are large or nearly identical.
This produced silently wrong stdev values (e.g., 41 million instead of 0 for
identical values) or complex numbers when the computed variance went negative.

Replace with the numerically stable two-pass formula sum((x - mean)²) / n,
iterating over the frequency map for efficiency with duplicate values.

https://claude.ai/code/session_01BH7FXdY35eRtf98jo8kQiG
---
 ngraph/results/artifacts.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/ngraph/results/artifacts.py b/ngraph/results/artifacts.py
index 0309cc5..47cda78 100644
--- a/ngraph/results/artifacts.py
+++ b/ngraph/results/artifacts.py
@@ -74,10 +74,9 @@ def from_values(
         if not values:
             raise ValueError("Cannot create envelope from empty values list")
 
-        # Single pass to calculate everything efficiently
+        # First pass: build frequency map and compute mean
         frequencies = {}
         total_sum = 0.0
-        sum_squares = 0.0
         min_capacity = float("inf")
         max_capacity = float("-inf")
 
@@ -87,7 +86,6 @@ def from_values(
 
             # Update statistics
             total_sum += value
-            sum_squares += value * value
             min_capacity = min(min_capacity, value)
             max_capacity = max(max_capacity, value)
 
@@ -95,9 +93,15 @@ def from_values(
         n = len(values)
         mean_capacity = total_sum / n
 
-        # Use computational formula for variance: Var(X) = E[X²] - (E[X])²
-        variance = (sum_squares / n) - (mean_capacity * mean_capacity)
-        stdev_capacity = variance**0.5
+        # Second pass over unique values: compute variance using the
+        # numerically stable formula sum((x - mean)^2) / n.
+        # Iterating over the frequency map is efficient when there are
+        # many duplicate values (common in Monte Carlo results).
+        variance_sum = 0.0
+        for value, count in frequencies.items():
+            diff = value - mean_capacity
+            variance_sum += count * diff * diff
+        stdev_capacity = (variance_sum / n) ** 0.5
 
         # Process flow summaries if provided
         flow_summary_stats = {}