Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions packages/types/src/providers/bedrock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,30 @@ export const bedrockModels = {
},
],
},
"anthropic.claude-opus-4-7": {
maxTokens: 8192,
contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
supportsImages: true,
supportsPromptCache: true,
supportsReasoningBudget: true,
inputPrice: 5.0, // $5 per million input tokens (≤200K context)
outputPrice: 25.0, // $25 per million output tokens (≤200K context)
cacheWritesPrice: 6.25, // $6.25 per million tokens
cacheReadsPrice: 0.5, // $0.50 per million tokens
minTokensPerCachePoint: 1024,
maxCachePoints: 4,
cachableFields: ["system", "messages", "tools"],
// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
tiers: [
{
contextWindow: 1_000_000, // 1M tokens with beta flag
inputPrice: 10.0, // $10 per million input tokens (>200K context)
outputPrice: 37.5, // $37.50 per million output tokens (>200K context)
cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context)
cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context)
},
],
},
"anthropic.claude-opus-4-5-20251101-v1:0": {
maxTokens: 8192,
contextWindow: 200_000,
Expand Down Expand Up @@ -525,6 +549,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
"anthropic.claude-sonnet-4-5-20250929-v1:0",
"anthropic.claude-sonnet-4-6",
"anthropic.claude-opus-4-6-v1",
"anthropic.claude-opus-4-7",
] as const

// Amazon Bedrock models that support Global Inference profiles
Expand All @@ -535,13 +560,15 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
// - Claude Haiku 4.5
// - Claude Opus 4.5
// - Claude Opus 4.6
// - Claude Opus 4.7
export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
"anthropic.claude-sonnet-4-20250514-v1:0",
"anthropic.claude-sonnet-4-5-20250929-v1:0",
"anthropic.claude-sonnet-4-6",
"anthropic.claude-haiku-4-5-20251001-v1:0",
"anthropic.claude-opus-4-5-20251101-v1:0",
"anthropic.claude-opus-4-6-v1",
"anthropic.claude-opus-4-7",
] as const

// Amazon Bedrock Service Tier types
Expand Down
60 changes: 60 additions & 0 deletions src/api/providers/__tests__/bedrock-reasoning.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,66 @@ describe("AwsBedrockHandler - Extended Thinking", () => {
expect(reasoningChunks[1].text).toBe(" about this problem.")
})

it("should use adaptive thinking for Opus 4.7 instead of enabled with budget_tokens", async () => {
handler = new AwsBedrockHandler({
apiProvider: "bedrock",
apiModelId: "anthropic.claude-opus-4-7",
awsRegion: "us-east-1",
enableReasoningEffort: true,
modelMaxTokens: 8192,
modelMaxThinkingTokens: 4096,
})

mockSend.mockResolvedValue({
stream: (async function* () {
yield { messageStart: { role: "assistant" } }
yield {
contentBlockStart: {
content_block: { type: "thinking", thinking: "Adaptive thinking..." },
contentBlockIndex: 0,
},
}
yield {
contentBlockDelta: {
delta: { type: "thinking_delta", thinking: " reasoning complete." },
},
}
yield {
contentBlockStart: {
start: { text: "Here is the answer." },
contentBlockIndex: 1,
},
}
yield { metadata: { usage: { inputTokens: 100, outputTokens: 50 } } }
})(),
})

const messages = [{ role: "user" as const, content: "Test message" }]
const stream = handler.createMessage("System prompt", messages)

const chunks = []
for await (const chunk of stream) {
chunks.push(chunk)
}

// Verify Opus 4.7 uses adaptive thinking (not enabled with budget_tokens)
expect(mockSend).toHaveBeenCalledTimes(1)
expect(capturedPayload).toBeDefined()
expect(capturedPayload.additionalModelRequestFields).toBeDefined()
expect(capturedPayload.additionalModelRequestFields.thinking).toEqual({
type: "adaptive",
})
expect(capturedPayload.additionalModelRequestFields.output_config).toEqual({
effort: "high",
})

// Verify reasoning chunks were yielded
const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
expect(reasoningChunks).toHaveLength(2)
expect((reasoningChunks[0] as any).text).toBe("Adaptive thinking...")
expect((reasoningChunks[1] as any).text).toBe(" reasoning complete.")
})

it("should support API key authentication", async () => {
handler = new AwsBedrockHandler({
apiProvider: "bedrock",
Expand Down
44 changes: 36 additions & 8 deletions src/api/providers/bedrock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,26 @@ interface BedrockInferenceConfig {
// Define interface for Bedrock additional model request fields
// This includes thinking configuration, 1M context beta, and other model-specific parameters
interface BedrockAdditionalModelFields {
thinking?: {
type: "enabled"
budget_tokens: number
thinking?:
| {
type: "enabled"
budget_tokens: number
}
| {
type: "adaptive"
}
output_config?: {
effort?: "low" | "medium" | "high"
}
anthropic_beta?: string[]
[key: string]: any // Add index signature to be compatible with DocumentType
}

// Models that only support thinking.type: "adaptive" (not "enabled" with budget_tokens)
const BEDROCK_ADAPTIVE_THINKING_ONLY_MODEL_IDS = [
"anthropic.claude-opus-4-7",
] as const

// Define interface for Bedrock payload
interface BedrockPayload {
modelId: BedrockModelId | string
Expand Down Expand Up @@ -392,12 +404,28 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH

if ((isThinkingExplicitlyEnabled || isThinkingEnabledBySettings) && modelConfig.info.supportsReasoningBudget) {
thinkingEnabled = true
additionalModelRequestFields = {
thinking: {
type: "enabled",
budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096,
},

// Opus 4.7 only supports thinking.type: "adaptive" with output_config.effort
// (NOT "enabled" with budget_tokens which returns a 400 error)
const baseId = this.parseBaseModelId(modelConfig.id)
if (BEDROCK_ADAPTIVE_THINKING_ONLY_MODEL_IDS.includes(baseId as any)) {
additionalModelRequestFields = {
thinking: {
type: "adaptive",
},
output_config: {
effort: "high",
},
}
} else {
additionalModelRequestFields = {
thinking: {
type: "enabled",
budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096,
},
}
}

logger.info("Extended thinking enabled for Bedrock request", {
ctx: "bedrock",
modelId: modelConfig.id,
Expand Down
Loading