From 3abd4bf47a96e5a3ce03f34819410bbc6fcc7970 Mon Sep 17 00:00:00 2001
From: qer <qer@msh.team>
Date: Fri, 12 Jun 2026 18:28:35 +0800
Subject: [PATCH] fix(kimi): auto-set temperature based on thinking mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Moonshot API requires temperature=1.0 when thinking is enabled and
temperature=0.6 when disabled. Previously, toggling thinking off in the TUI
left the temperature unset (defaulting to API-side defaults), which caused:

  400 invalid temperature: only 0.6 is allowed for this model

This change sets the correct default temperature inside withThinking():
- thinking ON (low/medium/high/xhigh/max) → temperature=1.0
- thinking OFF → temperature=0.6

Users can still override via KIMI_MODEL_TEMPERATURE env var, which is applied
later in applyKimiEnvSamplingParams and takes precedence.

Fixes #686

Signed-off-by: qkclaw <bj456736@users.noreply.github.com>
---
 packages/kosong/src/providers/kimi.ts |  7 +++++-
 packages/kosong/test/kimi.test.ts     | 31 ++++++++++++++++++++-------
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/packages/kosong/src/providers/kimi.ts b/packages/kosong/src/providers/kimi.ts
index ef53eca7c..36fe8f190 100644
--- a/packages/kosong/src/providers/kimi.ts
+++ b/packages/kosong/src/providers/kimi.ts
@@ -523,7 +523,12 @@ export class KimiChatProvider implements ChatProvider {
         reasoningEffort = 'high';
         break;
     }
-    return this._withGenerationKwargs({ reasoning_effort: reasoningEffort }).withExtraBody({
+    // Moonshot API requires temperature=1.0 when thinking is enabled and
+    // temperature=0.6 when thinking is disabled. Set a default here so users
+    // don't hit 400 errors after toggling thinking off; KIMI_MODEL_TEMPERATURE
+    // env var (applied later via applyKimiEnvSamplingParams) can still override.
+    const temperature = effort === 'off' ? 0.6 : 1.0;
+    return this._withGenerationKwargs({ reasoning_effort: reasoningEffort, temperature }).withExtraBody({
       thinking,
     });
   }
diff --git a/packages/kosong/test/kimi.test.ts b/packages/kosong/test/kimi.test.ts
index f309b2030..842c5274b 100644
--- a/packages/kosong/test/kimi.test.ts
+++ b/packages/kosong/test/kimi.test.ts
@@ -667,6 +667,7 @@ describe('KimiChatProvider', () => {
           thinking: { type: 'enabled' },
         },
         max_tokens: 512,
+        temperature: 1.0,
       });
     });
 
@@ -736,15 +737,29 @@ describe('KimiChatProvider', () => {
       expect(withLow.thinkingEffort).toBe('low');
     });
 
-    it('replaces the previous thinking effort when called again', () => {
-      const provider = createProvider().withThinking('high').withThinking('off');
+    it('sets temperature=1.0 when thinking is on and temperature=0.6 when off', async () => {
+      const onProvider = createProvider().withThinking('high');
+      const offProvider = createProvider().withThinking('off');
 
-      expect(getGenerationState(provider)).toEqual({
-        reasoning_effort: undefined,
-        extra_body: {
-          thinking: { type: 'disabled' },
-        },
-      });
+      const history: Message[] = [
+        { role: 'user', content: [{ type: 'text', text: 'Hi' }], toolCalls: [] },
+      ];
+
+      const onBody = await captureRequestBody(onProvider, '', [], history);
+      expect(onBody['temperature']).toBe(1.0);
+
+      const offBody = await captureRequestBody(offProvider, '', [], history);
+      expect(offBody['temperature']).toBe(0.6);
+    });
+
+    it('preserves explicit temperature via env var override after withThinking', () => {
+      // Simulates applyKimiEnvSamplingParams overriding the default:
+      // withThinking('off') sets 0.6, then env param sets 0.8.
+      const provider = createProvider()
+        .withThinking('off')
+        .withGenerationKwargs({ temperature: 0.8 });
+
+      expect(getGenerationState(provider).temperature).toBe(0.8);
     });
   });