SemiAnalysisAI · arygupt · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026 · claude
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -3474,3 +3474,16 @@
     - "Use scheduler-recv-interval values 2/60/30/1200/600/1920 for conc 1-4/8/16/32/64/128-256"
     - "Set max-running-requests=256, chunked-prefill-size=16384, mem-fraction-static=0.8, cuda-graph-max-bs=CONC, and enable symm-mem"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1544
+
+- config-keys:
+    - minimaxm2.5-fp8-h100-vllm
+    - minimaxm2.5-fp8-h200-vllm
+    - minimaxm2.5-fp4-b200-vllm
+    - minimaxm2.5-fp4-b300-vllm
+    - minimaxm2.5-fp4-mi355x-vllm
+  description:
+    - "Re-run MiniMax-M2.5 single-node vLLM sweeps (H100/H200 FP8, B200/B300/MI355X FP4) with no recipe change, to capture per-GPU power telemetry (avg_power_w) added in #1558 for the power/energy canvas"
+    - "Source rows for the canvas predate the 2026-05-27 power-capture merge, so they carry throughput/latency but no measured power; this re-run replaces the modeled power layer with measured power"
+    - "benchmarks-only: power comes from the benchmark runs, evals add nothing here"
+  benchmarks-only: true
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1666
diff --git a/utils/matrix_logic/test_validation.py b/utils/matrix_logic/test_validation.py
@@ -1,6 +1,7 @@
 """Comprehensive tests for validation.py"""
 import pytest
 from validation import (
+    ChangelogEntry,
     Fields,
     SingleNodeMatrixEntry,
     SingleNodeAgenticMatrixEntry,
@@ -932,3 +933,44 @@ def test_validation_runs_by_default(self, tmp_path):
         with pytest.raises(ValueError) as exc_info:
             load_runner_file(str(runner_file))
         assert "must be a list" in str(exc_info.value)
+
+
+class TestChangelogEntry:
+    """Tests for ChangelogEntry, incl. the benchmarks-only / evals-only options."""
+
+    def _base(self, **extra):
+        entry = {
+            "config-keys": ["minimaxm2.5-fp4-b200-vllm"],
+            "description": ["re-run for power capture"],
+            "pr-link": "https://github.com/SemiAnalysisAI/InferenceX/pull/1666",
+        }
+        entry.update(extra)
+        return entry
+
+    def test_defaults(self):
+        """Both opt-out flags default to False."""
+        entry = ChangelogEntry.model_validate(self._base())
+        assert entry.evals_only is False
+        assert entry.benchmarks_only is False
+
+    def test_benchmarks_only_alias(self):
+        """benchmarks-only YAML key maps to benchmarks_only."""
+        entry = ChangelogEntry.model_validate(self._base(**{"benchmarks-only": True}))
+        assert entry.benchmarks_only is True
+
+    def test_evals_only_alias(self):
+        entry = ChangelogEntry.model_validate(self._base(**{"evals-only": True}))
+        assert entry.evals_only is True
+
+    def test_evals_and_benchmarks_only_mutually_exclusive(self):
+        """Setting both opt-out flags is rejected."""
+        with pytest.raises(ValueError) as exc_info:
+            ChangelogEntry.model_validate(
+                self._base(**{"evals-only": True, "benchmarks-only": True})
+            )
+        assert "mutually exclusive" in str(exc_info.value)
+
+    def test_unknown_field_forbidden(self):
+        """extra='forbid' rejects typos like a singular 'benchmark-only'."""
+        with pytest.raises(ValueError):
+            ChangelogEntry.model_validate(self._base(**{"benchmark-only": True}))
diff --git a/utils/matrix_logic/validation.py b/utils/matrix_logic/validation.py
@@ -486,11 +486,23 @@ class ChangelogEntry(BaseModel):
     description: list[str] = Field(min_length=1)
     pr_link: str = Field(alias="pr-link")
     evals_only: bool = Field(alias="evals-only", default=False)
+    benchmarks_only: bool = Field(
+        alias="benchmarks-only", default=False,
+        description="Skip the eval pass; generate benchmarks only (e.g. power-only re-runs)."
+    )
     scenario_type: Optional[List[str]] = Field(
         alias="scenario-type", default=None,
         description="Restrict to specific scenario types (e.g., ['fixed-seq-len', 'agentic-coding'])"
     )
 
+    @model_validator(mode='after')
+    def check_evals_benchmarks_exclusive(self) -> "ChangelogEntry":
+        if self.evals_only and self.benchmarks_only:
+            raise ValueError(
+                "'evals-only' and 'benchmarks-only' are mutually exclusive; set at most one."
+            )
+        return self
+
 
 class ChangelogMetadata(BaseModel):
     """Pydantic model for validating changelog metadata structure."""

diff --git a/utils/process_changelog.py b/utils/process_changelog.py
@@ -175,8 +175,11 @@ def main():
                     raise
                 all_benchmark_results.extend(json.loads(result.stdout))
 
-        # Generate eval entries separately
-        eval_configs = [c for c in all_configs if c not in eval_configs_seen]
+        # Generate eval entries separately (skipped when the entry opts out via
+        # benchmarks-only, e.g. power-only re-runs that don't need eval scoring).
+        eval_configs = [] if entry.benchmarks_only else [
+            c for c in all_configs if c not in eval_configs_seen
+        ]
         if eval_configs:
             eval_configs_seen.update(eval_configs)
             base_cmd = [