diff --git a/.changeset/gpt-5-3-chat-latest.md b/.changeset/gpt-5-3-chat-latest.md new file mode 100644 index 00000000000..7b5ab730707 --- /dev/null +++ b/.changeset/gpt-5-3-chat-latest.md @@ -0,0 +1,5 @@ +--- +"roo-cline": patch +--- + +Add OpenAI's GPT-5.3-Chat-Latest model support diff --git a/.changeset/gpt-5-4.md b/.changeset/gpt-5-4.md new file mode 100644 index 00000000000..4a28f24f52c --- /dev/null +++ b/.changeset/gpt-5-4.md @@ -0,0 +1,5 @@ +--- +"roo-cline": patch +--- + +Add OpenAI's GPT-5.4 model support diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index 95e9095a89e..926f29a0a1a 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -98,6 +98,16 @@ export const modelInfoSchema = z.object({ outputPrice: z.number().optional(), cacheWritesPrice: z.number().optional(), cacheReadsPrice: z.number().optional(), + longContextPricing: z + .object({ + thresholdTokens: z.number(), + inputPriceMultiplier: z.number().optional(), + outputPriceMultiplier: z.number().optional(), + cacheWritesPriceMultiplier: z.number().optional(), + cacheReadsPriceMultiplier: z.number().optional(), + appliesToServiceTiers: z.array(serviceTierSchema).optional(), + }) + .optional(), description: z.string().optional(), // Default effort value for models that support reasoning effort reasoningEffort: reasoningEffortExtendedSchema.optional(), diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index 4ccdab79c71..cdbc82fd520 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -24,6 +24,32 @@ export const openAiNativeModels = { description: "GPT-5.1 Codex Max: Our most intelligent coding model optimized for long-horizon, agentic coding tasks", }, + "gpt-5.4": { + maxTokens: 128000, + contextWindow: 1_050_000, + includedTools: ["apply_patch"], + excludedTools: ["apply_diff", "write_to_file"], + supportsImages: true, + supportsPromptCache: true, + supportsReasoningEffort: ["none", "low", "medium", "high", "xhigh"], + reasoningEffort: "none", + inputPrice: 2.5, + outputPrice: 15.0, + cacheReadsPrice: 0.25, + longContextPricing: { + thresholdTokens: 272_000, + inputPriceMultiplier: 2, + outputPriceMultiplier: 1.5, + appliesToServiceTiers: ["default", "flex"], + }, + supportsVerbosity: true, + supportsTemperature: false, + tiers: [ + { name: "flex", contextWindow: 1_050_000, inputPrice: 1.25, outputPrice: 7.5, cacheReadsPrice: 0.125 }, + { name: "priority", contextWindow: 1_050_000, inputPrice: 5.0, outputPrice: 30.0, cacheReadsPrice: 0.5 }, + ], + description: "GPT-5.4: Our most capable model for professional work", + }, "gpt-5.2": { maxTokens: 128000, contextWindow: 400000, @@ -93,6 +119,18 @@ export const openAiNativeModels = { cacheReadsPrice: 0.175, description: "GPT-5.2 Chat: Optimized for conversational AI and chat use cases", }, + "gpt-5.3-chat-latest": { + maxTokens: 16_384, + contextWindow: 128_000, + includedTools: ["apply_patch"], + excludedTools: ["apply_diff", "write_to_file"], + supportsImages: true, + supportsPromptCache: true, + inputPrice: 1.75, + outputPrice: 14.0, + cacheReadsPrice: 0.175, + description: "GPT-5.3 Chat: Optimized for conversational AI and chat use cases", + }, "gpt-5.1": { maxTokens: 128000, contextWindow: 400000, diff --git a/src/api/providers/__tests__/openai-native-usage.spec.ts b/src/api/providers/__tests__/openai-native-usage.spec.ts index 48e1c26877b..a266642e7a7 100644 --- a/src/api/providers/__tests__/openai-native-usage.spec.ts +++ b/src/api/providers/__tests__/openai-native-usage.spec.ts @@ -8,6 +8,10 @@ describe("OpenAiNativeHandler - normalizeUsage", () => { id: "gpt-4o", info: openAiNativeModels["gpt-4o"], } + const gpt54Model = { + id: "gpt-5.4", + info: openAiNativeModels["gpt-5.4"], + } beforeEach(() => { handler = new OpenAiNativeHandler({ @@ -378,6 +382,12 @@ describe("OpenAiNativeHandler - normalizeUsage", () => { const fourOBody = buildRequestBodyForModel("gpt-4o") expect(fourOBody.prompt_cache_retention).toBeUndefined() + + const gpt54Body = buildRequestBodyForModel("gpt-5.4") + expect(gpt54Body.prompt_cache_retention).toBeUndefined() + + const chatModelBody = buildRequestBodyForModel("gpt-5.3-chat-latest") + expect(chatModelBody.prompt_cache_retention).toBeUndefined() }) it("should not set prompt_cache_retention when the model does not support prompt caching", () => { @@ -418,5 +428,64 @@ describe("OpenAiNativeHandler - normalizeUsage", () => { expect(result.totalCost).toBeGreaterThan(0) // Cost should be calculated with full input tokens since no cache reads }) + + it("should use standard GPT-5.4 pricing within the base context threshold", () => { + const usage = { + input_tokens: 100_000, + output_tokens: 1_000, + cache_read_input_tokens: 20_000, + } + + const result = (handler as any).normalizeUsage(usage, gpt54Model) + + expect(result).toMatchObject({ + type: "usage", + inputTokens: 100_000, + outputTokens: 1_000, + cacheReadTokens: 20_000, + }) + expect(result.totalCost).toBeCloseTo(0.22, 6) + }) + + it("should apply GPT-5.4 long-context pricing above the threshold", () => { + const usage = { + input_tokens: 300_000, + output_tokens: 1_000, + cache_read_input_tokens: 100_000, + } + + const result = (handler as any).normalizeUsage(usage, gpt54Model) + + expect(result).toMatchObject({ + type: "usage", + inputTokens: 300_000, + outputTokens: 1_000, + cacheReadTokens: 100_000, + }) + expect(result.totalCost).toBeCloseTo(1.0475, 6) + }) + + it("should not apply GPT-5.4 long-context pricing to priority tier", () => { + handler = new OpenAiNativeHandler({ + openAiNativeApiKey: "test-key", + openAiNativeServiceTier: "priority", + }) + + const usage = { + input_tokens: 300_000, + output_tokens: 1_000, + cache_read_input_tokens: 100_000, + } + + const result = (handler as any).normalizeUsage(usage, gpt54Model) + + expect(result).toMatchObject({ + type: "usage", + inputTokens: 300_000, + outputTokens: 1_000, + cacheReadTokens: 100_000, + }) + expect(result.totalCost).toBeCloseTo(1.08, 6) + }) }) }) diff --git a/src/api/providers/__tests__/openai-native.spec.ts b/src/api/providers/__tests__/openai-native.spec.ts index fefccbe3ab1..682fece5706 100644 --- a/src/api/providers/__tests__/openai-native.spec.ts +++ b/src/api/providers/__tests__/openai-native.spec.ts @@ -249,6 +249,34 @@ describe("OpenAiNativeHandler", () => { expect(modelInfo.info.supportsReasoningEffort).toEqual(["low", "medium", "high", "xhigh"]) }) + it("should return GPT-5.4 model info when selected", () => { + const gpt54Handler = new OpenAiNativeHandler({ + ...mockOptions, + apiModelId: "gpt-5.4", + }) + + const modelInfo = gpt54Handler.getModel() + expect(modelInfo.id).toBe("gpt-5.4") + expect(modelInfo.info.maxTokens).toBe(128000) + expect(modelInfo.info.contextWindow).toBe(1_050_000) + expect(modelInfo.info.supportsVerbosity).toBe(true) + expect(modelInfo.info.supportsReasoningEffort).toEqual(["none", "low", "medium", "high", "xhigh"]) + expect(modelInfo.info.reasoningEffort).toBe("none") + }) + + it("should return GPT-5.3 Chat model info when selected", () => { + const chatHandler = new OpenAiNativeHandler({ + ...mockOptions, + apiModelId: "gpt-5.3-chat-latest", + }) + + const modelInfo = chatHandler.getModel() + expect(modelInfo.id).toBe("gpt-5.3-chat-latest") + expect(modelInfo.info.maxTokens).toBe(16_384) + expect(modelInfo.info.contextWindow).toBe(128000) + expect(modelInfo.info.supportsImages).toBe(true) + }) + it("should handle undefined model ID", () => { const handlerWithoutModel = new OpenAiNativeHandler({ openAiNativeApiKey: "test-api-key", @@ -345,6 +373,107 @@ describe("OpenAiNativeHandler", () => { expect(textChunks[1].text).toBe(" world") }) + it("should handle GPT-5.4 model with Responses API", async () => { + const mockFetch = vitest.fn().mockResolvedValue({ + ok: true, + body: new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode( + 'data: {"type":"response.output_item.added","item":{"type":"text","text":"GPT-5.4 reply"}}\n\n', + ), + ) + controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) + controller.close() + }, + }), + }) + global.fetch = mockFetch as any + + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + + handler = new OpenAiNativeHandler({ + ...mockOptions, + apiModelId: "gpt-5.4", + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(mockFetch).toHaveBeenCalledWith( + "https://api.openai.com/v1/responses", + expect.objectContaining({ + body: expect.any(String), + }), + ) + const body = (mockFetch.mock.calls[0][1] as any).body as string + const parsedBody = JSON.parse(body) + expect(parsedBody.model).toBe("gpt-5.4") + expect(parsedBody.max_output_tokens).toBe(128000) + expect(parsedBody.temperature).toBeUndefined() + expect(parsedBody.include).toEqual(["reasoning.encrypted_content"]) + expect(parsedBody.reasoning?.effort).toBe("none") + expect(parsedBody.text?.verbosity).toBe("medium") + + const textChunks = chunks.filter((chunk) => chunk.type === "text") + expect(textChunks).toHaveLength(1) + expect(textChunks[0].text).toBe("GPT-5.4 reply") + }) + + it("should handle GPT-5.3 Chat model with Responses API", async () => { + // Mock fetch for Responses API + const mockFetch = vitest.fn().mockResolvedValue({ + ok: true, + body: new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode( + 'data: {"type":"response.output_item.added","item":{"type":"text","text":"Chat reply"}}\n\n', + ), + ) + controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")) + controller.close() + }, + }), + }) + global.fetch = mockFetch as any + + // Mock SDK to fail so it uses fetch + mockResponsesCreate.mockRejectedValue(new Error("SDK not available")) + + handler = new OpenAiNativeHandler({ + ...mockOptions, + apiModelId: "gpt-5.3-chat-latest", + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(mockFetch).toHaveBeenCalledWith( + "https://api.openai.com/v1/responses", + expect.objectContaining({ + body: expect.any(String), + }), + ) + const body = (mockFetch.mock.calls[0][1] as any).body as string + const parsedBody = JSON.parse(body) + expect(parsedBody.model).toBe("gpt-5.3-chat-latest") + expect(parsedBody.max_output_tokens).toBe(16_384) + expect(parsedBody.temperature).toBe(0) + expect(parsedBody.reasoning?.effort).toBeUndefined() + expect(parsedBody.text?.verbosity).toBeUndefined() + + const textChunks = chunks.filter((chunk) => chunk.type === "text") + expect(textChunks).toHaveLength(1) + expect(textChunks[0].text).toBe("Chat reply") + }) + it("should handle GPT-5-mini model with Responses API", async () => { // Mock fetch for Responses API const mockFetch = vitest.fn().mockResolvedValue({ diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index 3dfad3ed352..6ce93827636 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -148,6 +148,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio totalOutputTokens, cacheWriteTokens, cacheReadTokens, + effectiveTier, ) const reasoningTokens = diff --git a/src/shared/cost.ts b/src/shared/cost.ts index fea686d8aed..8954904fda1 100644 --- a/src/shared/cost.ts +++ b/src/shared/cost.ts @@ -1,4 +1,5 @@ import type { ModelInfo } from "@roo-code/types" +import type { ServiceTier } from "@roo-code/types" export interface ApiCostResult { totalInputTokens: number @@ -6,6 +7,38 @@ export interface ApiCostResult { totalCost: number } +function applyLongContextPricing(modelInfo: ModelInfo, totalInputTokens: number, serviceTier?: ServiceTier): ModelInfo { + const pricing = modelInfo.longContextPricing + if (!pricing || totalInputTokens <= pricing.thresholdTokens) { + return modelInfo + } + + const effectiveServiceTier = serviceTier ?? "default" + if (pricing.appliesToServiceTiers && !pricing.appliesToServiceTiers.includes(effectiveServiceTier)) { + return modelInfo + } + + return { + ...modelInfo, + inputPrice: + modelInfo.inputPrice !== undefined && pricing.inputPriceMultiplier !== undefined + ? modelInfo.inputPrice * pricing.inputPriceMultiplier + : modelInfo.inputPrice, + outputPrice: + modelInfo.outputPrice !== undefined && pricing.outputPriceMultiplier !== undefined + ? modelInfo.outputPrice * pricing.outputPriceMultiplier + : modelInfo.outputPrice, + cacheWritesPrice: + modelInfo.cacheWritesPrice !== undefined && pricing.cacheWritesPriceMultiplier !== undefined + ? modelInfo.cacheWritesPrice * pricing.cacheWritesPriceMultiplier + : modelInfo.cacheWritesPrice, + cacheReadsPrice: + modelInfo.cacheReadsPrice !== undefined && pricing.cacheReadsPriceMultiplier !== undefined + ? modelInfo.cacheReadsPrice * pricing.cacheReadsPriceMultiplier + : modelInfo.cacheReadsPrice, + } +} + function calculateApiCostInternal( modelInfo: ModelInfo, inputTokens: number, @@ -62,15 +95,17 @@ export function calculateApiCostOpenAI( outputTokens: number, cacheCreationInputTokens?: number, cacheReadInputTokens?: number, + serviceTier?: ServiceTier, ): ApiCostResult { const cacheCreationInputTokensNum = cacheCreationInputTokens || 0 const cacheReadInputTokensNum = cacheReadInputTokens || 0 const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum) + const effectiveModelInfo = applyLongContextPricing(modelInfo, inputTokens, serviceTier) // For OpenAI: inputTokens ALREADY includes all tokens (cached + non-cached) // So we pass the original inputTokens as the total return calculateApiCostInternal( - modelInfo, + effectiveModelInfo, nonCachedInputTokens, outputTokens, cacheCreationInputTokensNum, diff --git a/src/utils/__tests__/cost.spec.ts b/src/utils/__tests__/cost.spec.ts index 83d26871369..6f0b594c8d2 100644 --- a/src/utils/__tests__/cost.spec.ts +++ b/src/utils/__tests__/cost.spec.ts @@ -221,5 +221,86 @@ describe("Cost Utility", () => { expect(result.totalInputTokens).toBe(6000) // Total already includes cache expect(result.totalOutputTokens).toBe(500) }) + + it("should not apply long-context pricing at the threshold", () => { + const modelWithLongContextPricing: ModelInfo = { + ...mockModelInfo, + longContextPricing: { + thresholdTokens: 272_000, + inputPriceMultiplier: 2, + outputPriceMultiplier: 1.5, + cacheWritesPriceMultiplier: 2, + cacheReadsPriceMultiplier: 2, + }, + } + + const result = calculateApiCostOpenAI(modelWithLongContextPricing, 272_000, 1_000, undefined, 100_000) + + // Input cost: (3.0 / 1_000_000) * (272000 - 100000) = 0.516 + // Output cost: (15.0 / 1_000_000) * 1000 = 0.015 + // Cache reads: (0.3 / 1_000_000) * 100000 = 0.03 + // Total: 0.516 + 0.015 + 0.03 = 0.561 + expect(result.totalCost).toBeCloseTo(0.561, 6) + }) + + it("should apply long-context pricing above the threshold", () => { + const modelWithLongContextPricing: ModelInfo = { + maxTokens: 128_000, + contextWindow: 1_050_000, + supportsPromptCache: true, + inputPrice: 2.5, + outputPrice: 15.0, + cacheWritesPrice: 5.0, + cacheReadsPrice: 0.25, + longContextPricing: { + thresholdTokens: 272_000, + inputPriceMultiplier: 2, + outputPriceMultiplier: 1.5, + cacheWritesPriceMultiplier: 2, + cacheReadsPriceMultiplier: 2, + }, + } + + const result = calculateApiCostOpenAI(modelWithLongContextPricing, 300_000, 1_000, 20_000, 100_000) + + // Input cost: (5.0 / 1_000_000) * (300000 - 20000 - 100000) = 0.9 + // Output cost: (22.5 / 1_000_000) * 1000 = 0.0225 + // Cache writes: (10.0 / 1_000_000) * 20000 = 0.2 + // Cache reads: (0.5 / 1_000_000) * 100000 = 0.05 + // Total: 0.9 + 0.0225 + 0.2 + 0.05 = 1.1725 + expect(result.totalCost).toBeCloseTo(1.1725, 6) + }) + + it("should skip long-context pricing for service tiers outside the allowed list", () => { + const modelWithLongContextPricing: ModelInfo = { + maxTokens: 128_000, + contextWindow: 1_050_000, + supportsPromptCache: true, + inputPrice: 5.0, + outputPrice: 30.0, + cacheReadsPrice: 0.5, + longContextPricing: { + thresholdTokens: 272_000, + inputPriceMultiplier: 2, + outputPriceMultiplier: 1.5, + appliesToServiceTiers: ["default", "flex"], + }, + } + + const result = calculateApiCostOpenAI( + modelWithLongContextPricing, + 300_000, + 1_000, + undefined, + 100_000, + "priority", + ) + + // Input cost: (5.0 / 1_000_000) * (300000 - 100000) = 1.0 + // Output cost: (30.0 / 1_000_000) * 1000 = 0.03 + // Cache reads: (0.5 / 1_000_000) * 100000 = 0.05 + // Total: 1.0 + 0.03 + 0.05 = 1.08 + expect(result.totalCost).toBeCloseTo(1.08, 6) + }) }) })