diff --git a/apps/code/src/renderer/desktop-services.ts b/apps/code/src/renderer/desktop-services.ts index a5dd914288..02fb9ddaa3 100644 --- a/apps/code/src/renderer/desktop-services.ts +++ b/apps/code/src/renderer/desktop-services.ts @@ -108,13 +108,14 @@ container.bind(REPORT_MODEL_RESOLVER).toConstantValue({ async resolveDefaultModel( apiHost: string, adapter: "claude" | "codex", + preferredModel?: string | null, ): Promise { try { const options = await hostTrpcClient.agent.getPreviewConfigOptions.query({ apiHost, adapter, }); - return selectModelFromOptions(options); + return selectModelFromOptions(options, preferredModel); } catch (error) { reportModelResolverLog.warn("Failed to resolve default model", { error, diff --git a/packages/core/src/inbox/identifiers.ts b/packages/core/src/inbox/identifiers.ts index 3d515f0be0..31aba20b2d 100644 --- a/packages/core/src/inbox/identifiers.ts +++ b/packages/core/src/inbox/identifiers.ts @@ -18,9 +18,15 @@ export const LINEAR_OAUTH_FLOW = Symbol.for( ); export interface ReportModelResolver { + /** + * Resolve the model id to use for a cloud task. `preferredModel` (e.g. the + * persisted last-used model) is honoured only if the gateway still offers it; + * otherwise the adapter's server default is returned. + */ resolveDefaultModel( apiHost: string, adapter: "claude" | "codex", + preferredModel?: string | null, ): Promise; } diff --git a/packages/core/src/inbox/reportTaskCreation.test.ts b/packages/core/src/inbox/reportTaskCreation.test.ts new file mode 100644 index 0000000000..0f74dcdf5d --- /dev/null +++ b/packages/core/src/inbox/reportTaskCreation.test.ts @@ -0,0 +1,89 @@ +import { describe, expect, it } from "vitest"; +import { + type PreviewConfigOption, + selectModelFromOptions, +} from "./reportTaskCreation"; + +function modelOption( + currentValue: string, + available: string[], +): PreviewConfigOption { + return { + id: "model", + category: "model", + type: "select", + currentValue, + options: available.map((value) => ({ value })), + }; +} + +describe("selectModelFromOptions", () => { + it.each([ + { + name: "returns the server default when no preferred model is given", + options: [modelOption("claude-opus-4-8", ["claude-opus-4-8"])], + preferredModel: undefined, + expected: "claude-opus-4-8", + }, + { + name: "honours the preferred model when the gateway still offers it", + options: [ + modelOption("claude-opus-4-8", [ + "claude-opus-4-8", + "claude-sonnet-4-6", + ]), + ], + preferredModel: "claude-sonnet-4-6", + expected: "claude-sonnet-4-6", + }, + { + // The persisted model (e.g. a de-listed fable) is not in the available + // options, so it must not be returned — otherwise the run 403s. + name: "falls back to the server default when the preferred model is no longer offered", + options: [modelOption("claude-opus-4-8", ["claude-opus-4-8"])], + preferredModel: "claude-fable-5", + expected: "claude-opus-4-8", + }, + { + name: "ignores an empty string preferred model", + options: [modelOption("claude-opus-4-8", ["claude-opus-4-8"])], + preferredModel: "", + expected: "claude-opus-4-8", + }, + { + name: "ignores a null preferred model", + options: [modelOption("claude-opus-4-8", ["claude-opus-4-8"])], + preferredModel: null, + expected: "claude-opus-4-8", + }, + { + // The gateway can return models wrapped in labelled groups; a preferred + // model nested inside a group must still count as available. + name: "honours a preferred model nested in a labelled group", + options: [ + { + id: "model", + category: "model", + type: "select", + currentValue: "claude-opus-4-8", + options: [ + { options: [{ value: "claude-opus-4-8" }] }, + { options: [{ value: "claude-sonnet-4-6" }] }, + ], + } satisfies PreviewConfigOption, + ], + preferredModel: "claude-sonnet-4-6", + expected: "claude-sonnet-4-6", + }, + { + name: "returns undefined when there is no model option", + options: [ + { id: "mode", category: "mode", type: "select", currentValue: "plan" }, + ] satisfies PreviewConfigOption[], + preferredModel: "claude-opus-4-8", + expected: undefined, + }, + ])("$name", ({ options, preferredModel, expected }) => { + expect(selectModelFromOptions(options, preferredModel)).toBe(expected); + }); +}); diff --git a/packages/core/src/inbox/reportTaskCreation.ts b/packages/core/src/inbox/reportTaskCreation.ts index cdce6d812c..453196710b 100644 --- a/packages/core/src/inbox/reportTaskCreation.ts +++ b/packages/core/src/inbox/reportTaskCreation.ts @@ -1,22 +1,64 @@ import type { TaskCreationInput } from "@posthog/shared"; +/** A selectable choice, either flat or wrapped in a labelled group. */ +export interface PreviewConfigChoice { + value?: string; + options?: PreviewConfigChoice[]; +} + /** Minimal shape of a preview-config option we scan for the default model. */ export interface PreviewConfigOption { id?: string; category?: string; type?: string; currentValue?: string | boolean | null; + options?: PreviewConfigChoice[]; +} + +/** + * Flatten the (possibly nested) choices into the set of selectable values. + * The gateway may return models either flat or wrapped in labelled groups, so + * this mirrors `flattenConfigValues` in the TaskInput picker — a model nested in + * a group must still count as available. + */ +function flattenChoiceValues(choices: PreviewConfigChoice[]): string[] { + return choices.flatMap((choice) => + choice.options + ? flattenChoiceValues(choice.options) + : choice.value + ? [choice.value] + : [], + ); } -/** Pick the default model id out of the agent's preview-config options, if present. */ +/** + * Pick the model id out of the agent's preview-config options. + * + * When `preferredModel` is supplied (e.g. the user's persisted last-used model) + * it is honoured only if it is still one of the gateway's available models; + * otherwise we fall back to the server default (`currentValue`). One-click cloud + * flows pass their persisted model here so a stale id the gateway no longer + * offers can't slip through — without the check the run fails with a gateway 403 + * (e.g. a previously-selected model that was later de-listed for the org). + */ export function selectModelFromOptions( options: PreviewConfigOption[], + preferredModel?: string | null, ): string | undefined { const modelOption = options.find( (o) => o.id === "model" || o.category === "model", ); + if (modelOption?.type !== "select") { + return undefined; + } + if ( + preferredModel && + modelOption.options && + flattenChoiceValues(modelOption.options).includes(preferredModel) + ) { + return preferredModel; + } if ( - modelOption?.type === "select" && typeof modelOption.currentValue === "string" && modelOption.currentValue ) { diff --git a/packages/core/src/inbox/signalReportTaskService.test.ts b/packages/core/src/inbox/signalReportTaskService.test.ts index 6e1cf43e6a..dcc3784b3a 100644 --- a/packages/core/src/inbox/signalReportTaskService.test.ts +++ b/packages/core/src/inbox/signalReportTaskService.test.ts @@ -76,6 +76,21 @@ describe("SignalReportTaskService", () => { expect(result.status).toBe("created"); }); + it("forwards the override to the resolver as a preference, not a hard selection", async () => { + // The resolver validates the override against the gateway's available + // models, so it must receive it rather than the service short-circuiting. + const { service, modelResolver } = makeService(); + await service.createSignalReportTask( + makeInput({ modelOverride: "claude-sonnet" }), + vi.fn(), + ); + expect(modelResolver.resolveDefaultModel).toHaveBeenCalledWith( + expect.any(String), + "claude", + "claude-sonnet", + ); + }); + it("aborts with missing-model when no model can be resolved", async () => { const { service, createTask } = makeService( {}, @@ -89,6 +104,22 @@ describe("SignalReportTaskService", () => { expect(createTask).not.toHaveBeenCalled(); }); + it("falls back to the explicit override when the resolver fails transiently", async () => { + // A transient resolver failure returns undefined; a caller-supplied override + // is already a concrete model, so the run should use it rather than block. + const { service, createTask } = makeService( + {}, + { resolveDefaultModel: vi.fn().mockResolvedValue(undefined) }, + ); + const result = await service.createSignalReportTask( + makeInput({ modelOverride: "claude-sonnet" }), + vi.fn(), + ); + expect(result.status).toBe("created"); + expect(createTask).toHaveBeenCalledTimes(1); + expect(createTask.mock.calls[0][0].model).toBe("claude-sonnet"); + }); + it("returns create-failed when the saga fails", async () => { const { service } = makeService({ createTask: vi diff --git a/packages/core/src/inbox/signalReportTaskService.ts b/packages/core/src/inbox/signalReportTaskService.ts index 71fe958f57..53881d5641 100644 --- a/packages/core/src/inbox/signalReportTaskService.ts +++ b/packages/core/src/inbox/signalReportTaskService.ts @@ -66,9 +66,16 @@ export class SignalReportTaskService { } const apiHost = getCloudUrlFromRegion(input.cloudRegion); - const model = - input.modelOverride ?? - (await this.modelResolver.resolveDefaultModel(apiHost, input.adapter)); + // The override is a preference: the resolver keeps it only if the gateway + // still offers it, otherwise it falls back to the server default. On a + // transient resolver failure (undefined) we fall back to the explicit + // override so a valid override-driven run isn't blocked by a gateway outage. + const resolvedModel = await this.modelResolver.resolveDefaultModel( + apiHost, + input.adapter, + input.modelOverride, + ); + const model = resolvedModel ?? input.modelOverride; if (!model) { return { status: "missing-model" }; } diff --git a/packages/ui/src/features/home/hooks/useRunWorkstreamAction.ts b/packages/ui/src/features/home/hooks/useRunWorkstreamAction.ts index 8eacc466fb..a4c9b5ccc9 100644 --- a/packages/ui/src/features/home/hooks/useRunWorkstreamAction.ts +++ b/packages/ui/src/features/home/hooks/useRunWorkstreamAction.ts @@ -99,14 +99,22 @@ export function useRunWorkstreamAction() { void (async () => { try { // The cloud runtime requires a model: action-pinned, then last-used, - // then the adapter's server default. + // then the adapter's server default. The preferred candidate is only + // honoured if the gateway still offers it (the resolver validates it), + // so a stale persisted/pinned id can't reach the run and 403. const adapter = action.adapter ?? lastUsedAdapter; - let model = action.model ?? lastUsedModel ?? undefined; - if (!model && cloudRegion) { - model = await modelResolver.resolveDefaultModel( + const preferredModel = action.model ?? lastUsedModel ?? undefined; + let model = preferredModel; + if (cloudRegion) { + // The resolver swallows transient failures and returns undefined; fall + // back to the preferred id so a gateway outage degrades like the old + // code (a stale id may still 403) instead of hard-blocking valid runs. + const resolvedModel = await modelResolver.resolveDefaultModel( getCloudUrlFromRegion(cloudRegion), adapter, + preferredModel, ); + model = resolvedModel ?? preferredModel; } if (!model) { pendingTaskPromptStoreApi.clear(pendingTaskKey); diff --git a/packages/ui/src/features/inbox/components/ConfigureAgentsSection.tsx b/packages/ui/src/features/inbox/components/ConfigureAgentsSection.tsx index 406213fbf3..de8e932a53 100644 --- a/packages/ui/src/features/inbox/components/ConfigureAgentsSection.tsx +++ b/packages/ui/src/features/inbox/components/ConfigureAgentsSection.tsx @@ -368,14 +368,17 @@ function SetupTaskSection() { const settings = useSettingsStore.getState(); const adapter = settings.lastUsedAdapter ?? "claude"; const apiHost = getCloudUrlFromRegion(cloudRegion); - const model = - settings.lastUsedModel ?? - (await resolveDefaultModel( - queryClient, - apiHost, - adapter, - modelResolver, - )); + const resolvedModel = await resolveDefaultModel( + queryClient, + apiHost, + adapter, + modelResolver, + settings.lastUsedModel, + ); + // The resolver returns undefined on a transient failure; fall back to the + // persisted id so a gateway outage degrades gracefully rather than blocking + // setup for a user whose persisted model was valid. + const model = resolvedModel ?? settings.lastUsedModel; if (!model) { sonnerToast.dismiss(toastId); @@ -387,6 +390,15 @@ function SetupTaskSection() { return; } + // The persisted effort belongs to `lastUsedModel`; if the resolver swapped + // in a fallback default, that tier may be unsupported for the new model and + // the cloud runtime rejects the pair (see agent `bin.ts`). Only carry the + // effort when the model is unchanged; otherwise let the runtime default it. + const reasoningLevel = + model === settings.lastUsedModel + ? (settings.lastUsedReasoningEffort ?? undefined) + : undefined; + const input: TaskCreationInput = { content: AUTONOMY_SETUP_PROMPT, taskDescription: AUTONOMY_SETUP_PROMPT, @@ -396,7 +408,7 @@ function SetupTaskSection() { executionMode: "auto", adapter, model, - reasoningLevel: settings.lastUsedReasoningEffort ?? undefined, + reasoningLevel, }; const result = await taskService.createTask(input, (output) => { diff --git a/packages/ui/src/features/inbox/hooks/resolveDefaultModel.ts b/packages/ui/src/features/inbox/hooks/resolveDefaultModel.ts index ae91a12d03..63d3993b6b 100644 --- a/packages/ui/src/features/inbox/hooks/resolveDefaultModel.ts +++ b/packages/ui/src/features/inbox/hooks/resolveDefaultModel.ts @@ -5,23 +5,31 @@ import type { QueryClient } from "@tanstack/react-query"; const log = logger.scope("resolve-default-model"); /** - * Resolve the default model for the given adapter via the preview-config - * tRPC query. Returns the server's `currentValue` for the `model` option, or - * undefined if the call fails or the option is missing. + * Resolve the model for the given adapter via the preview-config tRPC query. * - * Used by inbox flows that create cloud tasks directly (Discuss, Create PR) - * without going through TaskInput – they need a model to pass to the saga - * and the user hasn't necessarily picked one yet. + * `preferredModel` (e.g. the persisted last-used model) is honoured only if the + * gateway still offers it; otherwise the server default (`currentValue`) is + * returned. Returns undefined if the call fails or the option is missing. + * + * Used by one-click flows that create cloud tasks directly (Discuss, Create PR, + * scout chat) without going through TaskInput – they need a model to pass to the + * saga. Validating the preferred model here is what keeps a stale persisted id + * the gateway no longer offers from reaching the run and 403-ing. */ export async function resolveDefaultModel( queryClient: QueryClient, apiHost: string, adapter: "claude" | "codex", modelResolver: ReportModelResolver, + preferredModel?: string | null, ): Promise { void queryClient; try { - return await modelResolver.resolveDefaultModel(apiHost, adapter); + return await modelResolver.resolveDefaultModel( + apiHost, + adapter, + preferredModel, + ); } catch (error) { log.warn("Failed to resolve default model", { error, adapter }); } diff --git a/packages/ui/src/features/inbox/hooks/useDiscussReport.test.tsx b/packages/ui/src/features/inbox/hooks/useDiscussReport.test.tsx index 4277736d0d..b0a9b2ae26 100644 --- a/packages/ui/src/features/inbox/hooks/useDiscussReport.test.tsx +++ b/packages/ui/src/features/inbox/hooks/useDiscussReport.test.tsx @@ -12,6 +12,9 @@ const createTask = vi.hoisted(() => const getUserIntegrationIdForRepo = vi.hoisted(() => vi.fn(() => "ghu_1")); const openTask = vi.hoisted(() => vi.fn().mockResolvedValue(undefined)); const toastError = vi.hoisted(() => vi.fn()); +const resolveDefaultModel = vi.hoisted(() => + vi.fn().mockResolvedValue("claude-sonnet"), +); vi.mock("@posthog/ui/features/auth/store", () => ({ useAuthStateValue: (sel: (s: { cloudRegion: string }) => unknown) => @@ -29,8 +32,14 @@ vi.mock("@posthog/ui/features/settings/settingsStore", () => ({ }), }, })); +// The runner resolves two distinct services off the container (the task service +// and the report model resolver); return the right shape per token so the model +// resolver isn't silently `{ createTask }` (which would make every run blocked). vi.mock("@posthog/di/react", () => ({ - useService: () => ({ createTask }), + useService: (token: symbol) => + token.description === "posthog.core.inbox.reportModelResolver" + ? { resolveDefaultModel } + : { createTask }, })); vi.mock("@posthog/ui/features/tasks/useTaskCrudMutations", () => ({ useCreateTask: () => ({ invalidateTasks: vi.fn() }), diff --git a/packages/ui/src/features/inbox/hooks/useInboxCloudTaskRunner.ts b/packages/ui/src/features/inbox/hooks/useInboxCloudTaskRunner.ts index eff1720415..4e4a766fe0 100644 --- a/packages/ui/src/features/inbox/hooks/useInboxCloudTaskRunner.ts +++ b/packages/ui/src/features/inbox/hooks/useInboxCloudTaskRunner.ts @@ -135,9 +135,20 @@ export function useInboxCloudTaskRunner({ const adapter = settings.lastUsedAdapter ?? "claude"; const apiHost = getCloudUrlFromRegion(cloudRegion); - const model = - settings.lastUsedModel ?? - (await resolveDefaultModel(queryClient, apiHost, adapter, modelResolver)); + // Pass the persisted model as a *preference*, not a hard selection: the + // resolver keeps it only if the gateway still offers it, otherwise it falls + // back to the server default. A stale id (e.g. one later de-listed for the + // org) would otherwise be sent here and fail the run with a gateway 403. + const resolvedModel = await resolveDefaultModel( + queryClient, + apiHost, + adapter, + modelResolver, + settings.lastUsedModel, + ); + // The resolver returns undefined on a transient failure; fall back to the + // persisted id so a gateway outage degrades gracefully rather than blocking. + const model = resolvedModel ?? settings.lastUsedModel; if (!model) { sonnerToast.dismiss(toastId); @@ -146,6 +157,15 @@ export function useInboxCloudTaskRunner({ return; } + // The persisted effort belongs to `lastUsedModel`; if the resolver swapped in + // a fallback default, that tier may be unsupported for the new model and the + // cloud runtime rejects the pair (see agent `bin.ts`). Only carry the effort + // when the model is unchanged; otherwise let the runtime pick its default. + const reasoningLevel = + model === settings.lastUsedModel + ? (settings.lastUsedReasoningEffort ?? undefined) + : undefined; + const input = buildInput({ reportId, reportTitle, @@ -153,7 +173,7 @@ export function useInboxCloudTaskRunner({ githubUserIntegrationId: String(githubUserIntegrationId), adapter, model, - reasoningLevel: settings.lastUsedReasoningEffort ?? undefined, + reasoningLevel, }); try {