From 3f02dc638941865af9c05006e9ec0a5d3ceebb19 Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Thu, 4 Jun 2026 13:50:54 +0200 Subject: [PATCH] feat(agents-server-ui): show per-response token usage in the meta row MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sums input/output tokens across every step of the run and renders them next to the elapsed-time ticker (e.g. `Thinking · 12s · 1.2k ↑ 412 ↓`). Counter updates at step boundaries — the LLM SDK only reports `usage` at end-of-step, so within a single text stream the value stays flat; tool-using runs see jumps as each step settles. Token plumbing (additive, no migration): - `StepValue` Zod + TS gains optional `input_tokens` / `output_tokens` - `outbound-bridge.ts:onStepEnd` now persists the `tokenInput` / `tokenOutput` values it was already receiving but dropping - `IncludesStep` / `EntityTimelineStepItem` and the three step `.select()` blocks surface the new fields - The cached `agent_response` section gets a summed `tokens?: { input?, output? }`, and the section-cache fingerprint includes per-step token deltas so a late `onStepEnd` invalidates a stale section --- .changeset/agent-token-usage.md | 27 ++++++++ packages/agents-runtime/src/entity-schema.ts | 8 +++ .../agents-runtime/src/entity-timeline.ts | 17 +++++ .../agents-runtime/src/outbound-bridge.ts | 6 ++ packages/agents-runtime/src/use-chat.ts | 36 ++++++++++ .../src/components/AgentResponse.tsx | 69 ++++++++++++++++++- .../src/components/TokenUsage.module.css | 8 +++ .../src/components/TokenUsage.tsx | 57 +++++++++++++++ 8 files changed, 226 insertions(+), 2 deletions(-) create mode 100644 .changeset/agent-token-usage.md create mode 100644 packages/agents-server-ui/src/components/TokenUsage.module.css create mode 100644 packages/agents-server-ui/src/components/TokenUsage.tsx diff --git a/.changeset/agent-token-usage.md b/.changeset/agent-token-usage.md new file mode 100644 index 0000000000..86b40ab4fc --- /dev/null +++ b/.changeset/agent-token-usage.md @@ -0,0 +1,27 @@ +--- +'@electric-ax/agents-server-ui': patch +'@electric-ax/agents-runtime': patch +'@electric-ax/agents-desktop': patch +--- + +Show per-response token usage in the agent meta row, e.g. `1.2k ↑ 412 +↓`. Updates as each step settles — for a single-turn call this lands +once at done; for tool-using runs the counter jumps at each step +boundary (the LLM SDK only emits `usage` at end-of-step, so we can't +tick smoothly between tokens). + +Plumbing: + +- `StepValue` gains optional `input_tokens` / `output_tokens` columns + (Zod + TS). Strictly additive: events recorded before this change + stay valid since both fields are optional, so no migration. +- `outbound-bridge.ts:onStepEnd` now persists the `tokenInput` / + `tokenOutput` it already received from `pi-adapter.ts` — previously + those values were accepted and silently dropped. +- `EntityTimelineStepItem` / `IncludesStep` surface the new fields, + and the three `.select()` blocks that materialize steps include + them. +- The cached `agent_response` section gets a `tokens?: { input?, + output? }` summed across the run's steps at section-build time, and + the section-cache fingerprint factors in step token deltas so a + late-arriving `onStepEnd` invalidates a stale section. diff --git a/packages/agents-runtime/src/entity-schema.ts b/packages/agents-runtime/src/entity-schema.ts index 2610cdb4ff..e457f93f36 100644 --- a/packages/agents-runtime/src/entity-schema.ts +++ b/packages/agents-runtime/src/entity-schema.ts @@ -123,6 +123,12 @@ type StepValue = { model_provider?: string model_id?: string duration_ms?: number + // Token usage for this step as reported by the provider's + // end-of-message `usage` payload. Populated on `onStepEnd` when the + // adapter has the data — older events without these fields stay + // valid (both optional), so this is a strictly additive change. + input_tokens?: number + output_tokens?: number } type TextValue = { key?: string @@ -436,6 +442,8 @@ function createStepSchema(): Schema { model_provider: z.string().optional(), model_id: z.string().optional(), duration_ms: z.number().int().optional(), + input_tokens: z.number().int().nonnegative().optional(), + output_tokens: z.number().int().nonnegative().optional(), }) } diff --git a/packages/agents-runtime/src/entity-timeline.ts b/packages/agents-runtime/src/entity-timeline.ts index 5acc65286b..7c13142b3f 100644 --- a/packages/agents-runtime/src/entity-timeline.ts +++ b/packages/agents-runtime/src/entity-timeline.ts @@ -57,6 +57,13 @@ export type EntityTimelineSection = items: Array done?: true error?: string + // Summed across all steps of the run that produced this section. + // Either side may be missing if the provider didn't report it + // (e.g. older events recorded before tokens were persisted). + tokens?: { + input?: number + output?: number + } } | { kind: `wake` @@ -103,6 +110,8 @@ export interface IncludesStep { status: `started` | `completed` model_id?: string duration_ms?: number + input_tokens?: number + output_tokens?: number } export interface IncludesError { @@ -228,6 +237,8 @@ export interface EntityTimelineStepItem { status: `started` | `completed` model_id?: string duration_ms?: number + input_tokens?: number + output_tokens?: number } export interface EntityTimelineErrorItem { @@ -778,6 +789,8 @@ function buildIncludesRuns(input: { status: step.status, model_id: step.model_id, duration_ms: step.duration_ms, + input_tokens: step.input_tokens, + output_tokens: step.output_tokens, }) stepsByRun.set(step.run_id, entries) } @@ -1361,6 +1374,8 @@ function buildEntityTimelineQuery( status: step.status, model_id: step.model_id, duration_ms: step.duration_ms, + input_tokens: step.input_tokens, + output_tokens: step.output_tokens, })), errors: q .from({ error: db.collections.errors }) @@ -1490,6 +1505,8 @@ export function createEntityIncludesQuery( status: step.status, model_id: step.model_id, duration_ms: step.duration_ms, + input_tokens: step.input_tokens, + output_tokens: step.output_tokens, })) ), errors: toArray( diff --git a/packages/agents-runtime/src/outbound-bridge.ts b/packages/agents-runtime/src/outbound-bridge.ts index 2c81851df1..75eb1400f4 100644 --- a/packages/agents-runtime/src/outbound-bridge.ts +++ b/packages/agents-runtime/src/outbound-bridge.ts @@ -231,6 +231,12 @@ export function createOutboundBridge( ...(opts?.durationMs !== undefined && { duration_ms: opts.durationMs, }), + ...(opts?.tokenInput !== undefined && { + input_tokens: opts.tokenInput, + }), + ...(opts?.tokenOutput !== undefined && { + output_tokens: opts.tokenOutput, + }), } as never, }) as ChangeEvent ) diff --git a/packages/agents-runtime/src/use-chat.ts b/packages/agents-runtime/src/use-chat.ts index d2681cdd19..a5b0f61c55 100644 --- a/packages/agents-runtime/src/use-chat.ts +++ b/packages/agents-runtime/src/use-chat.ts @@ -148,6 +148,14 @@ function fingerprintRun(run: IncludesRun): string { for (const tc of run.toolCalls) { fp += `:${tc.key}.${tc.status}${payloadSniff(`a`, tc.args)}${payloadSniff(`r`, tc.result)}` } + // Steps participate in the fingerprint because the section now + // surfaces summed token counts from them — without this, a step + // landing its `input_tokens` / `output_tokens` after the run + // already settled would not invalidate the cached section. + fp += `|s:${run.steps.length}` + for (const s of run.steps) { + fp += `:${s.key}.${s.status}.${s.input_tokens ?? `-`}.${s.output_tokens ?? `-`}` + } return fp } @@ -327,11 +335,39 @@ function buildAgentSection(run: IncludesRun): AgentResponseSection { failedToolText ?? finishReason ?? `Run failed (no error details recorded)` } + // Token totals across this run's steps. We accumulate per side and + // only attach `tokens` to the section if at least one step reported + // a number — that way a run whose provider never emitted usage data + // (older events, test fixtures, future providers without `usage`) + // continues to render with no token row instead of "0 / 0". + let tokenInputSum = 0 + let tokenOutputSum = 0 + let sawTokenInput = false + let sawTokenOutput = false + for (const step of run.steps) { + if (typeof step.input_tokens === `number`) { + tokenInputSum += step.input_tokens + sawTokenInput = true + } + if (typeof step.output_tokens === `number`) { + tokenOutputSum += step.output_tokens + sawTokenOutput = true + } + } + const tokens = + sawTokenInput || sawTokenOutput + ? { + ...(sawTokenInput && { input: tokenInputSum }), + ...(sawTokenOutput && { output: tokenOutputSum }), + } + : undefined + const section: AgentResponseSection = { kind: `agent_response`, items: contentItems, ...(run.status === `completed` && { done: true as const }), ...(errorText && { error: errorText }), + ...(tokens && { tokens }), } // Always cache (terminal or in-flight). Fingerprint check above // guarantees we never serve a stale streaming section — text growth diff --git a/packages/agents-server-ui/src/components/AgentResponse.tsx b/packages/agents-server-ui/src/components/AgentResponse.tsx index 722f56fef8..9c6b078675 100644 --- a/packages/agents-server-ui/src/components/AgentResponse.tsx +++ b/packages/agents-server-ui/src/components/AgentResponse.tsx @@ -26,6 +26,7 @@ import { ToolCallView } from './ToolCallView' import { TimeText } from './TimeText' import { ThinkingIndicator } from './ThinkingIndicator' import { ElapsedTime } from './ElapsedTime' +import { TokenUsage } from './TokenUsage' import { formatElapsedDuration, toMillis } from '../lib/formatTime' import styles from './AgentResponse.module.css' import type { @@ -400,6 +401,39 @@ export const AgentResponseLive = memo(function AgentResponseLive({ (q) => (run.errors ? q.from({ error: run.errors }) : undefined), [run.errors] ) + // Live token aggregation: subscribe to this run's step rows and + // sum `input_tokens` / `output_tokens` across them. Steps land + // their token counts on `onStepEnd`, so for a single-turn LLM call + // this updates once; for a tool-using run with N model calls it + // jumps N times as each step settles. + const { data: stepRows = [] } = useLiveQuery( + (q) => (run.steps ? q.from({ step: run.steps }) : undefined), + [run.steps] + ) + const liveTokens = useMemo(() => { + let inSum = 0 + let outSum = 0 + let sawIn = false + let sawOut = false + for (const s of stepRows as Array<{ + input_tokens?: number + output_tokens?: number + }>) { + if (typeof s.input_tokens === `number`) { + inSum += s.input_tokens + sawIn = true + } + if (typeof s.output_tokens === `number`) { + outSum += s.output_tokens + sawOut = true + } + } + if (!sawIn && !sawOut) return null + return { + input: sawIn ? inSum : undefined, + output: sawOut ? outSum : undefined, + } + }, [stepRows]) const sortedItems = useMemo( () => [...items].sort(compareLiveRunItems), [items] @@ -532,9 +566,24 @@ export const AgentResponseLive = memo(function AgentResponseLive({ )} + {/* Token usage — sums every step's `input_tokens` / + `output_tokens` as they land. Updates at step boundaries + (the LLM SDK only emits `usage` at end-of-step), so for a + single-turn call it appears once at done; for tool-using + runs it jumps as each step completes. */} + {liveTokens && ( + <> + {(hasLeadingMeta || (isStreaming && timestamp != null)) && ( + + · + + )} + + + )} {showTimestamp && ( <> - {hasLeadingMeta && ( + {(hasLeadingMeta || liveTokens) && ( · @@ -682,13 +731,29 @@ export const AgentResponse = memo(function AgentResponse({ )} + {/* Token usage — `section.tokens` is the sum across the + run's steps, materialized at section-build time. Mirrors + the live render above so cached + live look identical. */} + {section.tokens && ( + <> + {(hasLeadingMeta || (isStreaming && timestamp != null)) && ( + + · + + )} + + + )} {/* Timestamp only on a settled response — while the agent is still streaming we let `ThinkingIndicator` + `ElapsedTime` own the meta row so it doesn't sit inline with a timestamp that hasn't really happened yet. */} {showTimestamp && ( <> - {hasLeadingMeta && ( + {(hasLeadingMeta || section.tokens) && ( · diff --git a/packages/agents-server-ui/src/components/TokenUsage.module.css b/packages/agents-server-ui/src/components/TokenUsage.module.css new file mode 100644 index 0000000000..ceb3308435 --- /dev/null +++ b/packages/agents-server-ui/src/components/TokenUsage.module.css @@ -0,0 +1,8 @@ +/* Match the dimmed tone of the other meta-row siblings (done text, + * elapsed time, timestamp). `tabular-nums` keeps the digit column + * from jittering as the counts tick up on each step boundary. */ +.usage { + color: var(--ds-text-4); + opacity: 0.7; + font-variant-numeric: tabular-nums; +} diff --git a/packages/agents-server-ui/src/components/TokenUsage.tsx b/packages/agents-server-ui/src/components/TokenUsage.tsx new file mode 100644 index 0000000000..7e515693b6 --- /dev/null +++ b/packages/agents-server-ui/src/components/TokenUsage.tsx @@ -0,0 +1,57 @@ +import { Text } from '../ui' +import styles from './TokenUsage.module.css' + +/** + * Compact token-usage label, e.g. `1.2k ↑ 412 ↓`. + * + * Rendered next to the elapsed-time ticker in the agent response + * meta row, with `tabular-nums` to keep the digit column from + * jittering as numbers tick up (input grows when a tool result is + * fed back; output grows when the model streams a new step). + * + * Either side may be `undefined` (the provider didn't emit it, or + * the section is historical and was recorded before tokens were + * persisted) — we skip the missing half rather than print `0`. + */ +export function TokenUsage({ + input, + output, +}: { + input: number | undefined + output: number | undefined +}): React.ReactElement | null { + if (input == null && output == null) return null + const parts: Array = [] + if (input != null) parts.push(`${formatTokenCount(input)} ↑`) + if (output != null) parts.push(`${formatTokenCount(output)} ↓`) + const text = parts.join(` `) + const ariaParts: Array = [] + if (input != null) ariaParts.push(`${input} input tokens`) + if (output != null) ariaParts.push(`${output} output tokens`) + return ( + + {text} + + ) +} + +/** + * `Intl.NumberFormat` with `notation: 'compact'` gives us "1.2K", + * "12K", "1.2M" etc., locale-aware and bounded in width — better + * than a hand-rolled rounder. We force lowercase `k`/`m` afterward + * so the suffix tone matches the muted meta row. + */ +const compactFormatter = new Intl.NumberFormat(undefined, { + notation: `compact`, + maximumFractionDigits: 1, +}) + +function formatTokenCount(n: number): string { + if (n < 1000) return String(n) + return compactFormatter.format(n).toLowerCase() +}