feat: use judge_passed for all calcs

andrewklatzke · andrewklatzke · commit a8f14de3b760 · 2026-05-05T15:12:13.000-08:00
diff --git a/packages/optimization/src/ldai_optimizer/client.py b/packages/optimization/src/ldai_optimizer/client.py
@@ -60,6 +60,7 @@
     extract_json_from_response,
     generate_slug,
     interpolate_variables,
+    judge_passed,
     restore_variable_placeholders,
     validate_variation_response,
 )
@@ -142,16 +143,6 @@ def _compute_validation_count(pool_size: int) -> int:
 }
 
 
-def _judge_passed(score: float, threshold: float, is_inverted: bool) -> bool:
-    """Return True when a judge score meets its threshold.
-
-    For standard judges (higher is better) the score must reach the threshold
-    from below: ``score >= threshold``.  For inverted judges (lower is better,
-    e.g. toxicity) the score must stay at or below the threshold:
-    ``score <= threshold``.
-    """
-    return score <= threshold if is_inverted else score >= threshold
-
 
 class OptimizationClient:
     _options: OptimizationOptions
@@ -481,7 +472,7 @@ async def _call_judges(
                     if optimization_judge.threshold is not None
                     else 1.0
                 )
-                passed = _judge_passed(result.score, threshold, optimization_judge.is_inverted)
+                passed = judge_passed(result.score, threshold, optimization_judge.is_inverted)
                 logger.debug(
                     "[Iteration %d] -> Judge '%s' scored %.3f (threshold=%.3f, inverted=%s) -> %s%s",
                     iteration,
@@ -1868,7 +1859,7 @@ def _evaluate_response(self, optimize_context: OptimizationContext) -> bool:
                 if optimization_judge.threshold is not None
                 else 1.0
             )
-            if not _judge_passed(result.score, threshold, optimization_judge.is_inverted):
+            if not judge_passed(result.score, threshold, optimization_judge.is_inverted):
                 return False
 
         return True
diff --git a/packages/optimization/src/ldai_optimizer/prompts.py b/packages/optimization/src/ldai_optimizer/prompts.py
@@ -7,6 +7,7 @@
     OptimizationContext,
     OptimizationJudge,
 )
+from ldai_optimizer.util import judge_passed
 
 _DURATION_KEYWORDS = re.compile(
     r"\b(fast|faster|quickly|quick|latency|low-latency|duration|response\s+time|"
@@ -285,10 +286,7 @@ def variation_prompt_feedback(
             if optimization_judge:
                 score = result.score
                 if optimization_judge.threshold is not None:
-                    if optimization_judge.is_inverted:
-                        passed = score <= optimization_judge.threshold
-                    else:
-                        passed = score >= optimization_judge.threshold
+                    passed = judge_passed(score, optimization_judge.threshold, optimization_judge.is_inverted)
                     status = "PASSED" if passed else "FAILED"
                     feedback_line = (
                         f"- {judge_key}: Score {score:.3f}"
diff --git a/packages/optimization/src/ldai_optimizer/util.py b/packages/optimization/src/ldai_optimizer/util.py
@@ -303,3 +303,13 @@ def extract_json_from_response(response_str: str) -> Dict[str, Any]:
         )
 
     return response_data
+
+
+def judge_passed(score: float, threshold: float, is_inverted: bool) -> bool:
+    """Return True when a judge score meets its threshold.
+
+    For standard judges (higher is better) the score must reach the threshold:
+    ``score >= threshold``.  For inverted judges (lower is better, e.g. toxicity)
+    the score must stay at or below the threshold: ``score <= threshold``.
+    """
+    return score <= threshold if is_inverted else score >= threshold
diff --git a/packages/optimization/tests/test_client.py b/packages/optimization/tests/test_client.py
@@ -10,7 +10,8 @@
 from ldai.tracker import TokenUsage
 from ldclient import Context
 
-from ldai_optimizer.client import OptimizationClient, _compute_validation_count, _find_model_config, _judge_passed
+from ldai_optimizer.client import OptimizationClient, _compute_validation_count, _find_model_config
+from ldai_optimizer.util import judge_passed
 from ldai_optimizer.dataclasses import (
     AIJudgeCallConfig,
     GroundTruthOptimizationOptions,
@@ -4410,24 +4411,24 @@ async def test_optimization_key_in_post_url_uses_string_key_not_uuid(self):
 
 
 # ---------------------------------------------------------------------------
-# _judge_passed helper
+# judge_passed helper
 # ---------------------------------------------------------------------------
 
 
 class TestJudgePassed:
     def test_standard_judge_passes_at_or_above_threshold(self):
-        assert _judge_passed(0.8, 0.8, is_inverted=False) is True
-        assert _judge_passed(1.0, 0.8, is_inverted=False) is True
+        assert judge_passed(0.8, 0.8, is_inverted=False) is True
+        assert judge_passed(1.0, 0.8, is_inverted=False) is True
 
     def test_standard_judge_fails_below_threshold(self):
-        assert _judge_passed(0.5, 0.8, is_inverted=False) is False
+        assert judge_passed(0.5, 0.8, is_inverted=False) is False
 
     def test_inverted_judge_passes_at_or_below_threshold(self):
-        assert _judge_passed(0.1, 0.3, is_inverted=True) is True
-        assert _judge_passed(0.3, 0.3, is_inverted=True) is True
+        assert judge_passed(0.1, 0.3, is_inverted=True) is True
+        assert judge_passed(0.3, 0.3, is_inverted=True) is True
 
     def test_inverted_judge_fails_above_threshold(self):
-        assert _judge_passed(0.8, 0.3, is_inverted=True) is False
+        assert judge_passed(0.8, 0.3, is_inverted=True) is False
 
 
 # ---------------------------------------------------------------------------