diff --git a/apps/cli/src/commands/results/eval-runner.ts b/apps/cli/src/commands/results/eval-runner.ts index 6559a3da..e40c01c7 100644 --- a/apps/cli/src/commands/results/eval-runner.ts +++ b/apps/cli/src/commands/results/eval-runner.ts @@ -24,6 +24,7 @@ import type { Hono } from 'hono'; import { TARGET_FILE_CANDIDATES, discoverTargetsFile } from '../../utils/targets.js'; import { discoverEvalFiles } from '../eval/discover.js'; +import { buildDefaultRunDir } from '../eval/result-layout.js'; import { findRepoRoot } from '../eval/shared.js'; // ── In-memory run tracker ──────────────────────────────────────────────── @@ -32,6 +33,10 @@ interface StudioRun { id: string; status: 'starting' | 'running' | 'finished' | 'failed'; command: string; + /** Target name passed via --target (if any). Stored so the run list can show it before the first result is written. */ + target?: string; + /** Absolute path to the run directory (e.g. .agentv/results/runs/default/). Used to correlate this in-memory run with the filesystem run when the JSONL has 0 records yet. */ + outputDir?: string; startedAt: string; finishedAt?: string; exitCode?: number | null; @@ -62,6 +67,19 @@ function pruneFinishedRuns() { } } +/** + * Look up the target for a Studio-launched run by its index.jsonl path. + * Called by handleRuns in serve.ts when the JSONL has 0 records (run just started). + */ +export function getActiveRunTarget(indexJsonlPath: string): string | undefined { + for (const run of activeRuns.values()) { + if (run.outputDir && path.join(run.outputDir, 'index.jsonl') === indexJsonlPath) { + return run.target; + } + } + return undefined; +} + // ── Discover targets file from project root ────────────────────────────── async function discoverTargetsInProject(cwd: string): Promise { @@ -310,6 +328,17 @@ export function registerEvalRoutes( } const args = buildCliArgs(body); + // Determine the output directory for this run. When the caller provides + // an explicit --output (resume/rerun), use that path. Otherwise generate + // the default path now so we can pass it via --output and later correlate + // the filesystem run with this in-memory StudioRun (needed to show the + // target in the sidebar before any results have been written). + const outputDir = body.output?.trim() + ? path.resolve(cwd, body.output.trim()) + : buildDefaultRunDir(cwd); + if (!body.output?.trim()) { + args.push('--output', outputDir); + } const command = buildCliPreview(args); const runId = generateRunId(); @@ -317,6 +346,8 @@ export function registerEvalRoutes( id: runId, status: 'starting', command, + target: body.target?.trim() || undefined, + outputDir, startedAt: new Date().toISOString(), stdout: '', stderr: '', @@ -405,6 +436,7 @@ export function registerEvalRoutes( id: r.id, status: r.status, command: r.command, + target: r.target, started_at: r.startedAt, finished_at: r.finishedAt ?? null, exit_code: r.exitCode ?? null, @@ -481,6 +513,12 @@ export function registerEvalRoutes( } const args = buildCliArgs(body); + const outputDir = body.output?.trim() + ? path.resolve(cwd, body.output.trim()) + : buildDefaultRunDir(cwd); + if (!body.output?.trim()) { + args.push('--output', outputDir); + } const command = buildCliPreview(args); const runId = generateRunId(); @@ -488,6 +526,8 @@ export function registerEvalRoutes( id: runId, status: 'starting', command, + target: body.target?.trim() || undefined, + outputDir, startedAt: new Date().toISOString(), stdout: '', stderr: '', @@ -557,6 +597,7 @@ export function registerEvalRoutes( id: r.id, status: r.status, command: r.command, + target: r.target, started_at: r.startedAt, finished_at: r.finishedAt ?? null, exit_code: r.exitCode ?? null, diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts index 00ebaf3d..f88bf212 100644 --- a/apps/cli/src/commands/results/serve.ts +++ b/apps/cli/src/commands/results/serve.ts @@ -54,7 +54,7 @@ import { resolveRunManifestPath } from '../eval/result-layout.js'; import { loadRunCache, resolveRunCacheFile } from '../eval/run-cache.js'; import { findRepoRoot } from '../eval/shared.js'; import { listResultFiles } from '../inspect/utils.js'; -import { registerEvalRoutes } from './eval-runner.js'; +import { getActiveRunTarget, registerEvalRoutes } from './eval-runner.js'; import { loadLightweightResults, loadManifestResults, @@ -290,6 +290,10 @@ async function handleRuns(c: C, { searchDir, agentvDir }: DataContext) { target = records[0].target; experiment = records[0].experiment ?? experiment; passRate = records.filter((r) => r.score >= passThreshold).length / records.length; + } else { + // Run is in-progress with 0 results written yet — fall back to the + // in-memory target stored when the Studio launched this run. + target = getActiveRunTarget(m.path); } } catch { // ignore enrichment errors diff --git a/apps/studio/src/lib/run-label.test.ts b/apps/studio/src/lib/run-label.test.ts index d78060f6..40866331 100644 --- a/apps/studio/src/lib/run-label.test.ts +++ b/apps/studio/src/lib/run-label.test.ts @@ -33,4 +33,14 @@ describe('formatRunLabel', () => { }), ).toBe('29/04 09:17 · 0%'); }); + + it('shows target even when pass rate is 0 (active/in-progress run)', () => { + expect( + formatRunLabel({ + target: 'wtalms-stg', + timestamp: '2026-05-07T10:56:00.000Z', + pass_rate: 0, + }), + ).toBe('07/05 10:56 · wtalms-stg · 0%'); + }); }); diff --git a/apps/studio/src/lib/types.ts b/apps/studio/src/lib/types.ts index 6e2c1a6f..46f321d3 100644 --- a/apps/studio/src/lib/types.ts +++ b/apps/studio/src/lib/types.ts @@ -337,6 +337,7 @@ export interface EvalRunListResponse { id: string; status: string; command: string; + target?: string; started_at: string; finished_at: string | null; exit_code: number | null;