continuedev · syf2211 · Jun 27, 2026 · Jun 27, 2026 · Jun 27, 2026
@@ -1214,6 +1214,13 @@ export interface BaseCompletionOptions {
   toolChoice?: ToolChoice;
   reasoning?: boolean;
   reasoningBudgetTokens?: number;
+  thinking?: {
+    type?: "enabled" | "adaptive" | "disabled";
+    budget_tokens?: number;
+  };
+  output_config?: {
+    effort?: "low" | "medium" | "high";
+  };
   promptCaching?: boolean;
 }
 

@@ -55,9 +55,41 @@ class Anthropic extends BaseLLM {
   }
 
   // Public for use within VertexAI
+  private buildThinkingParams(
+    options: CompletionOptions,
+  ): Record<string, unknown> {
+    const params: Record<string, unknown> = {};
+
+    if (options.thinking?.type) {
+      const thinking: Record<string, unknown> = {
+        type: options.thinking.type,
+      };
+      if (options.thinking.type === "enabled") {
+        thinking.budget_tokens =
+          options.thinking.budget_tokens ??
+          options.reasoningBudgetTokens ??
+          DEFAULT_REASONING_TOKENS;
+      }
+      params.thinking = thinking;
+    } else if (options.reasoning) {
+      params.thinking = {
+        type: "enabled",
+        budget_tokens:
+          options.reasoningBudgetTokens ?? DEFAULT_REASONING_TOKENS,
+      };
+    }
+
+    if (options.output_config) {
+      params.output_config = options.output_config;
+    }
+
+    return params;
+  }
+
   public convertArgs(
     options: CompletionOptions,
   ): Omit<MessageCreateParams, "messages"> {
+    const thinkingParams = this.buildThinkingParams(options);
     const finalOptions = {
       top_k: options.topK,
       top_p: options.topP,
@@ -67,13 +99,7 @@ class Anthropic extends BaseLLM {
       stop_sequences: options.stop?.filter((x) => x.trim() !== ""),
       stream: options.stream ?? true,
       tools: options.tools?.map(this.convertToolToAnthropicTool),
-      thinking: options.reasoning
-        ? {
-            type: "enabled" as const,
-            budget_tokens:
-              options.reasoningBudgetTokens ?? DEFAULT_REASONING_TOKENS,
-          }
-        : undefined,
+      ...thinkingParams,
       tool_choice: options.toolChoice
         ? {
             type: "tool" as const,
@@ -82,7 +108,7 @@ class Anthropic extends BaseLLM {
         : undefined,
     };
 
-    return finalOptions;
+    return finalOptions as Omit<MessageCreateParams, "messages">;
   }
 
   private convertMessageContentToBlocks(

@@ -421,6 +421,205 @@ describe("Anthropic", () => {
       });
     });
 
+    test("should forward adaptive thinking and output_config from completion options", async () => {
+      const anthropic = new Anthropic({
+        apiKey: "test-api-key",
+        model: "claude-opus-4-8",
+        apiBase: "https://api.anthropic.com/v1/",
+      });
+
+      await runLlmTest({
+        llm: anthropic,
+        methodToTest: "streamChat",
+        params: [
+          [{ role: "user", content: "hello" }],
+          new AbortController().signal,
+          {
+            model: "claude-opus-4-8",
+            thinking: { type: "adaptive" },
+            output_config: { effort: "high" },
+          },
+        ],
+        expectedRequest: {
+          url: "https://api.anthropic.com/v1/messages",
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            Accept: "application/json",
+            "anthropic-version": "2023-06-01",
+            "x-api-key": "test-api-key",
+          },
+          body: {
+            model: "claude-opus-4-8",
+            max_tokens: 8192,
+            stream: true,
+            messages: [
+              {
+                role: "user",
+                content: [{ type: "text", text: "hello" }],
+              },
+            ],
+            thinking: { type: "adaptive" },
+            output_config: { effort: "high" },
+            system: "",
+          },
+        },
+        mockStream: [
+          '{"type": "content_block_delta", "delta": {"type": "text_delta", "text": "Hello!"}}',
+          '{"type": "content_block_stop"}',
+        ],
+      });
+    });
+
+    test("should forward output_config without explicit thinking.type", async () => {
+      const anthropic = new Anthropic({
+        apiKey: "test-api-key",
+        model: "claude-opus-4-8",
+        apiBase: "https://api.anthropic.com/v1/",
+      });
+
+      await runLlmTest({
+        llm: anthropic,
+        methodToTest: "streamChat",
+        params: [
+          [{ role: "user", content: "hello" }],
+          new AbortController().signal,
+          {
+            model: "claude-opus-4-8",
+            output_config: { effort: "high" },
+          },
+        ],
+        expectedRequest: {
+          url: "https://api.anthropic.com/v1/messages",
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            Accept: "application/json",
+            "anthropic-version": "2023-06-01",
+            "x-api-key": "test-api-key",
+          },
+          body: {
+            model: "claude-opus-4-8",
+            max_tokens: 8192,
+            stream: true,
+            messages: [
+              {
+                role: "user",
+                content: [{ type: "text", text: "hello" }],
+              },
+            ],
+            output_config: { effort: "high" },
+            system: "",
+          },
+        },
+        mockStream: [
+          '{"type": "content_block_delta", "delta": {"type": "text_delta", "text": "Hello!"}}',
+          '{"type": "content_block_stop"}',
+        ],
+      });
+    });
+
+    test("should forward output_config on legacy reasoning path", async () => {
+      const anthropic = new Anthropic({
+        apiKey: "test-api-key",
+        model: "claude-opus-4-8",
+        apiBase: "https://api.anthropic.com/v1/",
+      });
+
+      await runLlmTest({
+        llm: anthropic,
+        methodToTest: "streamChat",
+        params: [
+          [{ role: "user", content: "hello" }],
+          new AbortController().signal,
+          {
+            model: "claude-opus-4-8",
+            reasoning: true,
+            output_config: { effort: "high" },
+          },
+        ],
+        expectedRequest: {
+          url: "https://api.anthropic.com/v1/messages",
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            Accept: "application/json",
+            "anthropic-version": "2023-06-01",
+            "x-api-key": "test-api-key",
+          },
+          body: {
+            model: "claude-opus-4-8",
+            max_tokens: 8192,
+            stream: true,
+            messages: [
+              {
+                role: "user",
+                content: [{ type: "text", text: "hello" }],
+              },
+            ],
+            thinking: { type: "enabled", budget_tokens: 2048 },
+            output_config: { effort: "high" },
+            system: "",
+          },
+        },
+        mockStream: [
+          '{"type": "content_block_delta", "delta": {"type": "text_delta", "text": "Hello!"}}',
+          '{"type": "content_block_stop"}',
+        ],
+      });
+    });
+
+    test("should prefer configured thinking over legacy reasoning toggle", async () => {
+      const anthropic = new Anthropic({
+        apiKey: "test-api-key",
+        model: "claude-opus-4-8",
+        apiBase: "https://api.anthropic.com/v1/",
+        completionOptions: {
+          model: "claude-opus-4-8",
+          thinking: { type: "adaptive" },
+          output_config: { effort: "high" },
+        },
+      });
+
+      await runLlmTest({
+        llm: anthropic,
+        methodToTest: "streamChat",
+        params: [
+          [{ role: "user", content: "hello" }],
+          new AbortController().signal,
+          { reasoning: true },
+        ],
+        expectedRequest: {
+          url: "https://api.anthropic.com/v1/messages",
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            Accept: "application/json",
+            "anthropic-version": "2023-06-01",
+            "x-api-key": "test-api-key",
+          },
+          body: {
+            model: "claude-opus-4-8",
+            max_tokens: 8192,
+            stream: true,
+            messages: [
+              {
+                role: "user",
+                content: [{ type: "text", text: "hello" }],
+              },
+            ],
+            thinking: { type: "adaptive" },
+            output_config: { effort: "high" },
+            system: "",
+          },
+        },
+        mockStream: [
+          '{"type": "content_block_delta", "delta": {"type": "text_delta", "text": "Hello!"}}',
+          '{"type": "content_block_stop"}',
+        ],
+      });
+    });
+
     test("should handle custom max tokens", async () => {
       const anthropic = new Anthropic({
         apiKey: "test-api-key",

@@ -54,6 +54,10 @@ function buildReasoningCompletionOptions(
     return baseOptions;
   }
 
+  if (model.completionOptions?.thinking?.type) {
+    return baseOptions;
+  }
+
   const reasoningOptions: LLMFullCompletionOptions = {
     ...baseOptions,
     reasoning: !!hasReasoningEnabled,

@@ -44,6 +44,17 @@ export const modelCapabilitySchema = z.union([
 // not ideal but lose type suggestions if use z.infer because of the string fallback
 export type ModelCapability = "tool_use" | "image_input" | "next_edit";
 
+export const thinkingConfigSchema = z.object({
+  type: z.enum(["enabled", "adaptive", "disabled"]).optional(),
+  budget_tokens: z.number().optional(),
+});
+export type ThinkingConfig = z.infer<typeof thinkingConfigSchema>;
+
+export const outputConfigSchema = z.object({
+  effort: z.enum(["low", "medium", "high"]).optional(),
+});
+export type OutputConfig = z.infer<typeof outputConfigSchema>;
+
 export const completionOptionsSchema = z.object({
   contextLength: z.number().optional(),
   maxTokens: z.number().optional(),
@@ -57,6 +68,8 @@ export const completionOptionsSchema = z.object({
   n: z.number().optional(),
   reasoning: z.boolean().optional(),
   reasoningBudgetTokens: z.number().optional(),
+  thinking: thinkingConfigSchema.optional(),
+  output_config: outputConfigSchema.optional(),
   promptCaching: z.boolean().optional(),
   stream: z.boolean().optional(),
   keepAlive: z.number().optional(),