Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/gpt-5-3-chat-latest.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"roo-cline": patch
---

Add OpenAI's GPT-5.3-Chat-Latest model support
5 changes: 5 additions & 0 deletions .changeset/gpt-5-4.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"roo-cline": patch
---

Add OpenAI's GPT-5.4 model support
10 changes: 10 additions & 0 deletions packages/types/src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,16 @@ export const modelInfoSchema = z.object({
outputPrice: z.number().optional(),
cacheWritesPrice: z.number().optional(),
cacheReadsPrice: z.number().optional(),
longContextPricing: z
.object({
thresholdTokens: z.number(),
inputPriceMultiplier: z.number().optional(),
outputPriceMultiplier: z.number().optional(),
cacheWritesPriceMultiplier: z.number().optional(),
cacheReadsPriceMultiplier: z.number().optional(),
appliesToServiceTiers: z.array(serviceTierSchema).optional(),
})
.optional(),
description: z.string().optional(),
// Default effort value for models that support reasoning effort
reasoningEffort: reasoningEffortExtendedSchema.optional(),
Expand Down
38 changes: 38 additions & 0 deletions packages/types/src/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,32 @@ export const openAiNativeModels = {
description:
"GPT-5.1 Codex Max: Our most intelligent coding model optimized for long-horizon, agentic coding tasks",
},
"gpt-5.4": {
maxTokens: 128000,
contextWindow: 1_050_000,
includedTools: ["apply_patch"],
excludedTools: ["apply_diff", "write_to_file"],
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: ["none", "low", "medium", "high", "xhigh"],
reasoningEffort: "none",
inputPrice: 2.5,
outputPrice: 15.0,
cacheReadsPrice: 0.25,
longContextPricing: {
thresholdTokens: 272_000,
inputPriceMultiplier: 2,
outputPriceMultiplier: 1.5,
appliesToServiceTiers: ["default", "flex"],
},
supportsVerbosity: true,
supportsTemperature: false,
tiers: [
{ name: "flex", contextWindow: 1_050_000, inputPrice: 1.25, outputPrice: 7.5, cacheReadsPrice: 0.125 },
{ name: "priority", contextWindow: 1_050_000, inputPrice: 5.0, outputPrice: 30.0, cacheReadsPrice: 0.5 },
],
description: "GPT-5.4: Our most capable model for professional work",
},
"gpt-5.2": {
maxTokens: 128000,
contextWindow: 400000,
Expand Down Expand Up @@ -93,6 +119,18 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.175,
description: "GPT-5.2 Chat: Optimized for conversational AI and chat use cases",
},
"gpt-5.3-chat-latest": {
maxTokens: 16_384,
contextWindow: 128_000,
includedTools: ["apply_patch"],
excludedTools: ["apply_diff", "write_to_file"],
supportsImages: true,
supportsPromptCache: true,
inputPrice: 1.75,
outputPrice: 14.0,
cacheReadsPrice: 0.175,
description: "GPT-5.3 Chat: Optimized for conversational AI and chat use cases",
},
"gpt-5.1": {
maxTokens: 128000,
contextWindow: 400000,
Expand Down
69 changes: 69 additions & 0 deletions src/api/providers/__tests__/openai-native-usage.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
id: "gpt-4o",
info: openAiNativeModels["gpt-4o"],
}
const gpt54Model = {
id: "gpt-5.4",
info: openAiNativeModels["gpt-5.4"],
}

beforeEach(() => {
handler = new OpenAiNativeHandler({
Expand Down Expand Up @@ -378,6 +382,12 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {

const fourOBody = buildRequestBodyForModel("gpt-4o")
expect(fourOBody.prompt_cache_retention).toBeUndefined()

const gpt54Body = buildRequestBodyForModel("gpt-5.4")
expect(gpt54Body.prompt_cache_retention).toBeUndefined()

const chatModelBody = buildRequestBodyForModel("gpt-5.3-chat-latest")
expect(chatModelBody.prompt_cache_retention).toBeUndefined()
})

it("should not set prompt_cache_retention when the model does not support prompt caching", () => {
Expand Down Expand Up @@ -418,5 +428,64 @@ describe("OpenAiNativeHandler - normalizeUsage", () => {
expect(result.totalCost).toBeGreaterThan(0)
// Cost should be calculated with full input tokens since no cache reads
})

it("should use standard GPT-5.4 pricing within the base context threshold", () => {
const usage = {
input_tokens: 100_000,
output_tokens: 1_000,
cache_read_input_tokens: 20_000,
}

const result = (handler as any).normalizeUsage(usage, gpt54Model)

expect(result).toMatchObject({
type: "usage",
inputTokens: 100_000,
outputTokens: 1_000,
cacheReadTokens: 20_000,
})
expect(result.totalCost).toBeCloseTo(0.22, 6)
})

it("should apply GPT-5.4 long-context pricing above the threshold", () => {
const usage = {
input_tokens: 300_000,
output_tokens: 1_000,
cache_read_input_tokens: 100_000,
}

const result = (handler as any).normalizeUsage(usage, gpt54Model)

expect(result).toMatchObject({
type: "usage",
inputTokens: 300_000,
outputTokens: 1_000,
cacheReadTokens: 100_000,
})
expect(result.totalCost).toBeCloseTo(1.0475, 6)
})

it("should not apply GPT-5.4 long-context pricing to priority tier", () => {
handler = new OpenAiNativeHandler({
openAiNativeApiKey: "test-key",
openAiNativeServiceTier: "priority",
})

const usage = {
input_tokens: 300_000,
output_tokens: 1_000,
cache_read_input_tokens: 100_000,
}

const result = (handler as any).normalizeUsage(usage, gpt54Model)

expect(result).toMatchObject({
type: "usage",
inputTokens: 300_000,
outputTokens: 1_000,
cacheReadTokens: 100_000,
})
expect(result.totalCost).toBeCloseTo(1.08, 6)
})
})
})
129 changes: 129 additions & 0 deletions src/api/providers/__tests__/openai-native.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,34 @@ describe("OpenAiNativeHandler", () => {
expect(modelInfo.info.supportsReasoningEffort).toEqual(["low", "medium", "high", "xhigh"])
})

it("should return GPT-5.4 model info when selected", () => {
const gpt54Handler = new OpenAiNativeHandler({
...mockOptions,
apiModelId: "gpt-5.4",
})

const modelInfo = gpt54Handler.getModel()
expect(modelInfo.id).toBe("gpt-5.4")
expect(modelInfo.info.maxTokens).toBe(128000)
expect(modelInfo.info.contextWindow).toBe(1_050_000)
expect(modelInfo.info.supportsVerbosity).toBe(true)
expect(modelInfo.info.supportsReasoningEffort).toEqual(["none", "low", "medium", "high", "xhigh"])
expect(modelInfo.info.reasoningEffort).toBe("none")
})

it("should return GPT-5.3 Chat model info when selected", () => {
const chatHandler = new OpenAiNativeHandler({
...mockOptions,
apiModelId: "gpt-5.3-chat-latest",
})

const modelInfo = chatHandler.getModel()
expect(modelInfo.id).toBe("gpt-5.3-chat-latest")
expect(modelInfo.info.maxTokens).toBe(16_384)
expect(modelInfo.info.contextWindow).toBe(128000)
expect(modelInfo.info.supportsImages).toBe(true)
})

it("should handle undefined model ID", () => {
const handlerWithoutModel = new OpenAiNativeHandler({
openAiNativeApiKey: "test-api-key",
Expand Down Expand Up @@ -345,6 +373,107 @@ describe("OpenAiNativeHandler", () => {
expect(textChunks[1].text).toBe(" world")
})

it("should handle GPT-5.4 model with Responses API", async () => {
const mockFetch = vitest.fn().mockResolvedValue({
ok: true,
body: new ReadableStream({
start(controller) {
controller.enqueue(
new TextEncoder().encode(
'data: {"type":"response.output_item.added","item":{"type":"text","text":"GPT-5.4 reply"}}\n\n',
),
)
controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
controller.close()
},
}),
})
global.fetch = mockFetch as any

mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))

handler = new OpenAiNativeHandler({
...mockOptions,
apiModelId: "gpt-5.4",
})

const stream = handler.createMessage(systemPrompt, messages)
const chunks: any[] = []
for await (const chunk of stream) {
chunks.push(chunk)
}

expect(mockFetch).toHaveBeenCalledWith(
"https://api.openai.com/v1/responses",
expect.objectContaining({
body: expect.any(String),
}),
)
const body = (mockFetch.mock.calls[0][1] as any).body as string
const parsedBody = JSON.parse(body)
expect(parsedBody.model).toBe("gpt-5.4")
expect(parsedBody.max_output_tokens).toBe(128000)
expect(parsedBody.temperature).toBeUndefined()
expect(parsedBody.include).toEqual(["reasoning.encrypted_content"])
expect(parsedBody.reasoning?.effort).toBe("none")
expect(parsedBody.text?.verbosity).toBe("medium")

const textChunks = chunks.filter((chunk) => chunk.type === "text")
expect(textChunks).toHaveLength(1)
expect(textChunks[0].text).toBe("GPT-5.4 reply")
})

it("should handle GPT-5.3 Chat model with Responses API", async () => {
// Mock fetch for Responses API
const mockFetch = vitest.fn().mockResolvedValue({
ok: true,
body: new ReadableStream({
start(controller) {
controller.enqueue(
new TextEncoder().encode(
'data: {"type":"response.output_item.added","item":{"type":"text","text":"Chat reply"}}\n\n',
),
)
controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
controller.close()
},
}),
})
global.fetch = mockFetch as any

// Mock SDK to fail so it uses fetch
mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))

handler = new OpenAiNativeHandler({
...mockOptions,
apiModelId: "gpt-5.3-chat-latest",
})

const stream = handler.createMessage(systemPrompt, messages)
const chunks: any[] = []
for await (const chunk of stream) {
chunks.push(chunk)
}

expect(mockFetch).toHaveBeenCalledWith(
"https://api.openai.com/v1/responses",
expect.objectContaining({
body: expect.any(String),
}),
)
const body = (mockFetch.mock.calls[0][1] as any).body as string
const parsedBody = JSON.parse(body)
expect(parsedBody.model).toBe("gpt-5.3-chat-latest")
expect(parsedBody.max_output_tokens).toBe(16_384)
expect(parsedBody.temperature).toBe(0)
expect(parsedBody.reasoning?.effort).toBeUndefined()
expect(parsedBody.text?.verbosity).toBeUndefined()

const textChunks = chunks.filter((chunk) => chunk.type === "text")
expect(textChunks).toHaveLength(1)
expect(textChunks[0].text).toBe("Chat reply")
})

it("should handle GPT-5-mini model with Responses API", async () => {
// Mock fetch for Responses API
const mockFetch = vitest.fn().mockResolvedValue({
Expand Down
1 change: 1 addition & 0 deletions src/api/providers/openai-native.ts
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
totalOutputTokens,
cacheWriteTokens,
cacheReadTokens,
effectiveTier,
)

const reasoningTokens =
Expand Down
37 changes: 36 additions & 1 deletion src/shared/cost.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,44 @@
import type { ModelInfo } from "@roo-code/types"
import type { ServiceTier } from "@roo-code/types"

export interface ApiCostResult {
totalInputTokens: number
totalOutputTokens: number
totalCost: number
}

function applyLongContextPricing(modelInfo: ModelInfo, totalInputTokens: number, serviceTier?: ServiceTier): ModelInfo {
const pricing = modelInfo.longContextPricing
if (!pricing || totalInputTokens <= pricing.thresholdTokens) {
return modelInfo
}

const effectiveServiceTier = serviceTier ?? "default"
if (pricing.appliesToServiceTiers && !pricing.appliesToServiceTiers.includes(effectiveServiceTier)) {
return modelInfo
}

return {
...modelInfo,
inputPrice:
modelInfo.inputPrice !== undefined && pricing.inputPriceMultiplier !== undefined
? modelInfo.inputPrice * pricing.inputPriceMultiplier
: modelInfo.inputPrice,
outputPrice:
modelInfo.outputPrice !== undefined && pricing.outputPriceMultiplier !== undefined
? modelInfo.outputPrice * pricing.outputPriceMultiplier
: modelInfo.outputPrice,
cacheWritesPrice:
modelInfo.cacheWritesPrice !== undefined && pricing.cacheWritesPriceMultiplier !== undefined
? modelInfo.cacheWritesPrice * pricing.cacheWritesPriceMultiplier
: modelInfo.cacheWritesPrice,
cacheReadsPrice:
modelInfo.cacheReadsPrice !== undefined && pricing.cacheReadsPriceMultiplier !== undefined
? modelInfo.cacheReadsPrice * pricing.cacheReadsPriceMultiplier
: modelInfo.cacheReadsPrice,
}
}

function calculateApiCostInternal(
modelInfo: ModelInfo,
inputTokens: number,
Expand Down Expand Up @@ -62,15 +95,17 @@ export function calculateApiCostOpenAI(
outputTokens: number,
cacheCreationInputTokens?: number,
cacheReadInputTokens?: number,
serviceTier?: ServiceTier,
): ApiCostResult {
const cacheCreationInputTokensNum = cacheCreationInputTokens || 0
const cacheReadInputTokensNum = cacheReadInputTokens || 0
const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum)
const effectiveModelInfo = applyLongContextPricing(modelInfo, inputTokens, serviceTier)

// For OpenAI: inputTokens ALREADY includes all tokens (cached + non-cached)
// So we pass the original inputTokens as the total
return calculateApiCostInternal(
modelInfo,
effectiveModelInfo,
nonCachedInputTokens,
outputTokens,
cacheCreationInputTokensNum,
Expand Down
Loading
Loading