diff --git a/apps/studio/src/components/EvalDetail.tsx b/apps/studio/src/components/EvalDetail.tsx index 89acf0306..654bdff4b 100644 --- a/apps/studio/src/components/EvalDetail.tsx +++ b/apps/studio/src/components/EvalDetail.tsx @@ -59,20 +59,6 @@ export function EvalDetail({ eval: result, runId, benchmarkId }: EvalDetailProps return (
- {/* Compact header: test ID + metadata (no scores — scores live in Checks tab) */} -
-
-

{result.testId}

-

- {result.target && Target: {result.target}} - {result.durationMs != null && ( - {(result.durationMs / 1000).toFixed(1)}s - )} - {result.costUsd != null && ${result.costUsd.toFixed(4)}} -

-
-
- {/* Tab navigation — at the top so Files tab editor fills maximum height */}
diff --git a/apps/studio/src/routes/benchmarks/$benchmarkId_/evals/$runId.$evalId.tsx b/apps/studio/src/routes/benchmarks/$benchmarkId_/evals/$runId.$evalId.tsx index c3c916341..169d6d643 100644 --- a/apps/studio/src/routes/benchmarks/$benchmarkId_/evals/$runId.$evalId.tsx +++ b/apps/studio/src/routes/benchmarks/$benchmarkId_/evals/$runId.$evalId.tsx @@ -7,7 +7,7 @@ import { useState } from 'react'; import { EvalDetail } from '~/components/EvalDetail'; import { RunEvalModal } from '~/components/RunEvalModal'; -import { useBenchmarkRunDetail, useStudioConfig } from '~/lib/api'; +import { isPassing, useBenchmarkRunDetail, useStudioConfig } from '~/lib/api'; export const Route = createFileRoute('/benchmarks/$benchmarkId_/evals/$runId/$evalId')({ component: BenchmarkEvalDetailPage, @@ -50,6 +50,12 @@ function BenchmarkEvalDetailPage() { ); } + const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8; + const passed = + isPassing(result.score, passThreshold) && + result.executionStatus !== 'error' && + result.executionStatus !== 'failed'; + return (
@@ -57,7 +63,12 @@ function BenchmarkEvalDetailPage() {

Run: {runId} / Eval: {evalId}

-

{evalId}

+

+ + {passed ? '✓' : '✗'} + + {evalId} +

{!isReadOnly && (