From a81e435e5c116030bb71f53d8d05655902b7c37f Mon Sep 17 00:00:00 2001 From: Kyle Mathews Date: Fri, 5 Jun 2026 12:16:41 -0600 Subject: [PATCH 1/2] Handle model provider timeouts and errors --- .../agents-runtime/src/context-factory.ts | 3 + packages/agents-runtime/src/index.ts | 7 + .../src/model-provider-error.ts | 145 ++++++++++++++++++ .../agents-runtime/src/outbound-bridge.ts | 16 ++ packages/agents-runtime/src/pi-adapter.ts | 90 ++++++++++- packages/agents-runtime/src/process-wake.ts | 10 +- packages/agents-runtime/src/types.ts | 2 + .../test/model-provider-error.test.ts | 42 +++++ .../src/components/AgentResponse.tsx | 28 +--- 9 files changed, 313 insertions(+), 30 deletions(-) create mode 100644 packages/agents-runtime/src/model-provider-error.ts create mode 100644 packages/agents-runtime/test/model-provider-error.test.ts diff --git a/packages/agents-runtime/src/context-factory.ts b/packages/agents-runtime/src/context-factory.ts index d0c8354fd0..9a48b040d4 100644 --- a/packages/agents-runtime/src/context-factory.ts +++ b/packages/agents-runtime/src/context-factory.ts @@ -546,6 +546,9 @@ export function createHandlerContext( getApiKey: activeAgentConfig.getApiKey, onPayload: activeAgentConfig.onPayload, + + modelTimeoutMs: activeAgentConfig.modelTimeoutMs, + modelMaxRetries: activeAgentConfig.modelMaxRetries, }) const handle = adapterFactory({ entityUrl: config.entityUrl, diff --git a/packages/agents-runtime/src/index.ts b/packages/agents-runtime/src/index.ts index c593d6ac84..172be74b9e 100644 --- a/packages/agents-runtime/src/index.ts +++ b/packages/agents-runtime/src/index.ts @@ -163,6 +163,13 @@ export type { export { buildSections, buildTimelineEntries } from './use-chat' export type { EntityTimelineEntry } from './use-chat' export { appendPathToUrl } from './url' +export { + ModelProviderError, + classifyModelProviderError, + modelProviderErrorMessage, + toModelProviderError, +} from './model-provider-error' +export type { ModelProviderErrorCode } from './model-provider-error' export { defaultProjection, diff --git a/packages/agents-runtime/src/model-provider-error.ts b/packages/agents-runtime/src/model-provider-error.ts new file mode 100644 index 0000000000..810891b263 --- /dev/null +++ b/packages/agents-runtime/src/model-provider-error.ts @@ -0,0 +1,145 @@ +export type ModelProviderErrorCode = + | `MODEL_PROVIDER_TIMEOUT` + | `MODEL_PROVIDER_UNREACHABLE` + | `MODEL_PROVIDER_AUTH_FAILED` + | `MODEL_PROVIDER_RATE_LIMITED` + | `MODEL_PROVIDER_UNAVAILABLE` + | `MODEL_PROVIDER_ERROR` + +export class ModelProviderError extends Error { + readonly code: ModelProviderErrorCode + readonly provider?: string + readonly model?: string + + constructor(opts: { + code: ModelProviderErrorCode + message: string + provider?: string + model?: string + cause?: unknown + }) { + super( + opts.message, + opts.cause === undefined ? undefined : { cause: opts.cause } + ) + this.name = `ModelProviderError` + this.code = opts.code + this.provider = opts.provider + this.model = opts.model + } +} + +function stringifyError(error: unknown): string { + if (error instanceof Error) { + const cause = (error as { cause?: unknown }).cause + return [ + error.name, + error.message, + cause === undefined ? `` : stringifyError(cause), + ] + .filter(Boolean) + .join(` `) + } + return String(error) +} + +export function classifyModelProviderError( + error: unknown +): ModelProviderErrorCode { + const text = stringifyError(error).toLowerCase() + + if ( + /\b(aborterror|timeouterror)\b/.test(text) || + text.includes(`timeout`) || + text.includes(`timed out`) + ) { + return `MODEL_PROVIDER_TIMEOUT` + } + + if ( + text.includes(`401`) || + text.includes(`invalid api key`) || + text.includes(`authentication`) || + text.includes(`unauthorized`) + ) { + return `MODEL_PROVIDER_AUTH_FAILED` + } + + if (text.includes(`429`) || text.includes(`rate limit`)) { + return `MODEL_PROVIDER_RATE_LIMITED` + } + + if ( + text.includes(`502`) || + text.includes(`503`) || + text.includes(`504`) || + text.includes(`overloaded`) || + text.includes(`unavailable`) + ) { + return `MODEL_PROVIDER_UNAVAILABLE` + } + + if ( + text.includes(`enotfound`) || + text.includes(`econnrefused`) || + text.includes(`econnreset`) || + text.includes(`eai_again`) || + text.includes(`fetch failed`) || + text.includes(`failed to fetch`) || + text.includes(`network`) + ) { + return `MODEL_PROVIDER_UNREACHABLE` + } + + return `MODEL_PROVIDER_ERROR` +} + +export function modelProviderErrorMessage(opts: { + code: ModelProviderErrorCode + provider?: string +}): string { + const provider = opts.provider + ? displayProvider(opts.provider) + : `the model provider` + switch (opts.code) { + case `MODEL_PROVIDER_TIMEOUT`: + return `${provider} did not respond before the timeout. Check your Internet connection or provider status.` + case `MODEL_PROVIDER_UNREACHABLE`: + return `Could not reach ${provider}. Check your Internet connection or ${provider} status.` + case `MODEL_PROVIDER_AUTH_FAILED`: + return `${provider} rejected the API key. Check your model provider credentials.` + case `MODEL_PROVIDER_RATE_LIMITED`: + return `${provider} rate limited the request. Please wait and try again.` + case `MODEL_PROVIDER_UNAVAILABLE`: + return `${provider} is currently unavailable. Check provider status and try again.` + case `MODEL_PROVIDER_ERROR`: + return `${provider} returned an error. Check the runtime logs for provider details.` + } +} + +export function toModelProviderError( + error: unknown, + opts: { provider?: string; model?: string } +): ModelProviderError { + if (error instanceof ModelProviderError) return error + const code = classifyModelProviderError(error) + const detail = error instanceof Error ? error.message : String(error) + return new ModelProviderError({ + code, + provider: opts.provider, + model: opts.model, + message: `${modelProviderErrorMessage({ code, provider: opts.provider })} (${detail})`, + cause: error, + }) +} + +function displayProvider(provider: string): string { + switch (provider.toLowerCase()) { + case `anthropic`: + return `Anthropic` + case `openai`: + return `OpenAI` + default: + return provider + } +} diff --git a/packages/agents-runtime/src/outbound-bridge.ts b/packages/agents-runtime/src/outbound-bridge.ts index 2c81851df1..22229083ee 100644 --- a/packages/agents-runtime/src/outbound-bridge.ts +++ b/packages/agents-runtime/src/outbound-bridge.ts @@ -100,6 +100,7 @@ export async function loadOutboundIdSeed( export interface OutboundBridge { onRunStart: () => void onRunEnd: (opts?: { finishReason?: string }) => void + onError: (opts: { errorCode: string; message: string }) => void onStepStart: (opts?: { modelProvider?: string; modelId?: string }) => void onStepEnd: (opts?: { finishReason?: string @@ -193,6 +194,21 @@ export function createOutboundBridge( currentRunKey = null }, + onError(opts: { errorCode: string; message: string }) { + if (!currentRunKey) return + writeEvent( + entityStateSchema.errors.insert({ + key: `${currentRunKey}:error-${crypto.randomUUID()}`, + value: { + error_code: opts.errorCode, + message: opts.message, + run_id: currentRunKey, + ...(currentStepKey ? { step_id: currentStepKey } : {}), + } as never, + }) as ChangeEvent + ) + }, + onStepStart(opts?: { modelProvider?: string; modelId?: string }) { const runKey = requireActiveRun(`onStepStart`) currentStepKey = `step-${counters.step++}` diff --git a/packages/agents-runtime/src/pi-adapter.ts b/packages/agents-runtime/src/pi-adapter.ts index 71c4d0f99d..45f4d4a6c9 100644 --- a/packages/agents-runtime/src/pi-adapter.ts +++ b/packages/agents-runtime/src/pi-adapter.ts @@ -12,6 +12,10 @@ import { getModel } from '@mariozechner/pi-ai' import { createOutboundBridge } from './outbound-bridge' import { MOONSHOT_PROVIDER, getMoonshotModel } from './moonshot-models' import { runtimeLog } from './log' +import { + ModelProviderError, + toModelProviderError, +} from './model-provider-error' import type { OutboundIdSeed } from './outbound-bridge' import type { ChangeEvent } from '@durable-streams/state' import type { @@ -42,6 +46,25 @@ export interface PiAdapterOptions { provider: string ) => Promise | string | undefined onPayload?: SimpleStreamOptions[`onPayload`] + modelTimeoutMs?: number + modelMaxRetries?: number +} + +const DEFAULT_MODEL_TIMEOUT_MS = 30_000 +const DEFAULT_MODEL_MAX_RETRIES = 0 + +function readPositiveIntEnv(name: string): number | undefined { + const raw = process.env[name] + if (!raw) return undefined + const parsed = Number(raw) + return Number.isFinite(parsed) && parsed > 0 ? Math.floor(parsed) : undefined +} + +function readNonNegativeIntEnv(name: string): number | undefined { + const raw = process.env[name] + if (!raw) return undefined + const parsed = Number(raw) + return Number.isFinite(parsed) && parsed >= 0 ? Math.floor(parsed) : undefined } interface PiAgentAdapterConfig { @@ -227,8 +250,16 @@ export function createPiAgentAdapter( model: opts.model, ...(opts.provider && { provider: opts.provider }), }) - - const agent = new Agent({ + const modelTimeoutMs = + opts.modelTimeoutMs ?? + readPositiveIntEnv(`ELECTRIC_AGENTS_MODEL_TIMEOUT_MS`) ?? + DEFAULT_MODEL_TIMEOUT_MS + const modelMaxRetries = + opts.modelMaxRetries ?? + readNonNegativeIntEnv(`ELECTRIC_AGENTS_MODEL_MAX_RETRIES`) ?? + DEFAULT_MODEL_MAX_RETRIES + + const agentOptions = { initialState: { systemPrompt: opts.systemPrompt, tools: opts.tools as Array, @@ -238,7 +269,18 @@ export function createPiAgentAdapter( ...(opts.streamFn && { streamFn: opts.streamFn }), ...(opts.getApiKey && { getApiKey: opts.getApiKey }), ...(opts.onPayload && { onPayload: opts.onPayload }), - }) + // Pi forwards these options to provider stream calls in current releases. + // Keep them as a top-level passthrough so unreachable providers settle + // even when the caller did not provide a custom stream function. Older + // type definitions don't expose them yet, so keep our timeout fallback + // below as the hard guarantee. + timeoutMs: modelTimeoutMs, + maxRetries: modelMaxRetries, + } + + const agent = new Agent( + agentOptions as ConstructorParameters[0] + ) function processAgentEvents( resolveWhenDone: () => void, @@ -361,8 +403,11 @@ export function createPiAgentAdapter( }) if (isError) { - throw new Error( - `pi-agent message_end error: ${msg.errorMessage ?? `unknown error`} (stopReason=${msg.stopReason ?? `none`})` + throw toModelProviderError( + new Error( + `pi-agent message_end error: ${msg.errorMessage ?? `unknown error`} (stopReason=${msg.stopReason ?? `none`})` + ), + { provider: model.provider, model: model.id } ) } break @@ -437,20 +482,38 @@ export function createPiAgentAdapter( let settled = false let unsubscribe = (): void => {} let abortFallback: ReturnType | null = null + let modelTimeout: ReturnType | null = null const clearAbortFallback = (): void => { if (!abortFallback) return clearTimeout(abortFallback) abortFallback = null } + const clearModelTimeout = (): void => { + if (!modelTimeout) return + clearTimeout(modelTimeout) + modelTimeout = null + } const finish = (finishReason: `stop` | `aborted` | `error`): void => { if (settled) return settled = true clearAbortFallback() + clearModelTimeout() running = false abortSignal?.removeEventListener(`abort`, abortRun) unsubscribe() bridge.onRunEnd({ finishReason }) } + const failWithProviderError = (err: unknown): ModelProviderError => { + const providerError = toModelProviderError(err, { + provider: model.provider, + model: model.id, + }) + bridge.onError({ + errorCode: providerError.code, + message: providerError.message, + }) + return providerError + } const abortRun = (): void => { if (settled) return abortedRun = true @@ -476,12 +539,24 @@ export function createPiAgentAdapter( }, (err) => { if (settled) return + const providerError = failWithProviderError(err) finish(`error`) - reject(err) + reject(providerError) } ) abortSignal?.addEventListener(`abort`, abortRun, { once: true }) + modelTimeout = setTimeout(() => { + if (settled) return + const providerError = failWithProviderError( + new Error( + `model provider request timed out after ${modelTimeoutMs}ms` + ) + ) + agent.abort() + finish(`error`) + reject(providerError) + }, modelTimeoutMs) const runPromise = input !== undefined ? agent.prompt(input) : agent.continue() if (abortSignal?.aborted) { @@ -491,8 +566,9 @@ export function createPiAgentAdapter( Promise.resolve(runPromise).catch((err: Error) => { if (settled) return if (abortedRun) return + const providerError = failWithProviderError(err) finish(`error`) - reject(err) + reject(providerError) }) }) }, diff --git a/packages/agents-runtime/src/process-wake.ts b/packages/agents-runtime/src/process-wake.ts index 4b1e3d7df4..bbb6a02b18 100644 --- a/packages/agents-runtime/src/process-wake.ts +++ b/packages/agents-runtime/src/process-wake.ts @@ -13,6 +13,7 @@ import { unrestrictedSandbox } from './sandbox/unrestricted' import { resolveSandboxIdentity } from './sandbox/identity' import { appendPathToUrl } from './url' import { manifestChildKey } from './manifest-helpers' +import { ModelProviderError } from './model-provider-error' import { buildHydratedEventSourceWake, eventSourceWakeInfoFromManifests, @@ -2078,13 +2079,18 @@ export async function processWake( await waitForSignalHandlers() activeSignalHandler = null wakeSession.rollbackManifestEntries() - const errMsg = toError(setupErr).message + const err = toError(setupErr) + const errMsg = err.message + const errCode = + setupErr instanceof ModelProviderError + ? setupErr.code + : `HANDLER_FAILED` log.error(`handler failed for ${entityUrl}:`, errMsg) writeEvent( entityStateSchema.errors.insert({ key: `error-${epoch}-${crypto.randomUUID()}`, value: { - error_code: `HANDLER_FAILED`, + error_code: errCode, message: errMsg, } as never, }) as ChangeEvent diff --git a/packages/agents-runtime/src/types.ts b/packages/agents-runtime/src/types.ts index c10d9063c2..9f0509071f 100644 --- a/packages/agents-runtime/src/types.ts +++ b/packages/agents-runtime/src/types.ts @@ -858,6 +858,8 @@ export interface AgentConfig { provider: string ) => Promise | string | undefined onPayload?: SimpleStreamOptions[`onPayload`] + modelTimeoutMs?: number + modelMaxRetries?: number testResponses?: TestResponses } diff --git a/packages/agents-runtime/test/model-provider-error.test.ts b/packages/agents-runtime/test/model-provider-error.test.ts new file mode 100644 index 0000000000..cfb31349da --- /dev/null +++ b/packages/agents-runtime/test/model-provider-error.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, it } from 'vitest' +import { + classifyModelProviderError, + modelProviderErrorMessage, + toModelProviderError, +} from '../src/model-provider-error' + +describe(`model provider error classification`, () => { + it.each([ + [new Error(`fetch failed`), `MODEL_PROVIDER_UNREACHABLE`], + [new Error(`ENOTFOUND api.anthropic.com`), `MODEL_PROVIDER_UNREACHABLE`], + [new Error(`timeout`), `MODEL_PROVIDER_TIMEOUT`], + [new Error(`request timed out`), `MODEL_PROVIDER_TIMEOUT`], + [new Error(`401 invalid api key`), `MODEL_PROVIDER_AUTH_FAILED`], + [new Error(`authentication failed`), `MODEL_PROVIDER_AUTH_FAILED`], + [new Error(`429 rate limit`), `MODEL_PROVIDER_RATE_LIMITED`], + [new Error(`503 overloaded`), `MODEL_PROVIDER_UNAVAILABLE`], + [new Error(`something unexpected`), `MODEL_PROVIDER_ERROR`], + ] as const)(`classifies %s as %s`, (error, code) => { + expect(classifyModelProviderError(error)).toBe(code) + }) + + it(`creates friendly provider-specific messages with original detail`, () => { + const error = toModelProviderError(new Error(`fetch failed`), { + provider: `anthropic`, + model: `claude-sonnet-4-5`, + }) + + expect(error.code).toBe(`MODEL_PROVIDER_UNREACHABLE`) + expect(error.message).toContain(`Could not reach Anthropic`) + expect(error.message).toContain(`fetch failed`) + }) + + it(`has a timeout message`, () => { + expect( + modelProviderErrorMessage({ + code: `MODEL_PROVIDER_TIMEOUT`, + provider: `openai`, + }) + ).toContain(`OpenAI did not respond`) + }) +}) diff --git a/packages/agents-server-ui/src/components/AgentResponse.tsx b/packages/agents-server-ui/src/components/AgentResponse.tsx index 722f56fef8..2fadacfb84 100644 --- a/packages/agents-server-ui/src/components/AgentResponse.tsx +++ b/packages/agents-server-ui/src/components/AgentResponse.tsx @@ -322,16 +322,14 @@ function liveRunItemsToContentItems( return contentItems } +function formatError(error: EntityTimelineErrorItem): string { + return error.error_code + ? `${error.error_code}: ${error.message}` + : error.message +} + function errorText(errors: Array): string | undefined { - return errors.length > 0 - ? errors - .map((error) => - error.error_code - ? `${error.error_code}: ${error.message}` - : error.message - ) - .join(`; `) - : undefined + return errors.length > 0 ? errors.map(formatError).join(`; `) : undefined } function failedRunText( @@ -714,18 +712,6 @@ export const AgentResponse = memo(function AgentResponse({ )} - {section.done && copyText && ( - - )} ) From 9cec07d58092dbc7da4cdc9f7e60b7374198ffca Mon Sep 17 00:00:00 2001 From: Kyle Mathews Date: Fri, 5 Jun 2026 12:21:55 -0600 Subject: [PATCH 2/2] Add changeset for model provider errors --- .changeset/model-provider-errors.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changeset/model-provider-errors.md diff --git a/.changeset/model-provider-errors.md b/.changeset/model-provider-errors.md new file mode 100644 index 0000000000..4e329edc21 --- /dev/null +++ b/.changeset/model-provider-errors.md @@ -0,0 +1,6 @@ +--- +"@electric-ax/agents-runtime": patch +"@electric-ax/agents-server-ui": patch +--- + +Add default model-provider timeout/error handling for agent runs and render durable run errors in the UI.