diff --git a/.changeset/fix-orphan-tool-calls-after-resume.md b/.changeset/fix-orphan-tool-calls-after-resume.md new file mode 100644 index 000000000..127c7addc --- /dev/null +++ b/.changeset/fix-orphan-tool-calls-after-resume.md @@ -0,0 +1,6 @@ +--- +"@moonshot-ai/agent-core": patch +"@moonshot-ai/kimi-code": patch +--- + +Prevent orphaned tool calls from causing provider errors after resume or compaction, and stop deferred messages from getting stuck when an open tool exchange is compacted away. diff --git a/packages/agent-core/src/agent/context/index.ts b/packages/agent-core/src/agent/context/index.ts index 4746eeb36..b442a8243 100644 --- a/packages/agent-core/src/agent/context/index.ts +++ b/packages/agent-core/src/agent/context/index.ts @@ -169,6 +169,10 @@ export class ContextMemory { ...this._history.slice(result.compactedCount), ]; this.openSteps.clear(); + const trimmed = trimTrailingOpenToolExchange(this._history); + if (trimmed.length === this._history.length) { + this.pendingToolResultIds.clear(); + } this.flushDeferredMessagesIfToolExchangeClosed(); this._tokenCount = result.tokensAfter; this.tokenCountCoveredMessageCount = this._history.length; @@ -198,7 +202,29 @@ export class ContextMemory { } project(messages: readonly ContextMessage[]): Message[] { - return project(this.agent.microCompaction.compact(messages)); + return trimTrailingOpenToolExchange( + project(this.agent.microCompaction.compact(messages)), + ); + } + + cleanupOrphanedToolCalls(): void { + if (this.pendingToolResultIds.size === 0) return; + const trimmed = trimTrailingOpenToolExchange(this._history); + const removed = this._history.length - trimmed.length; + if (removed > 0) { + this.agent.records.logRecord({ + type: 'context.cleanup_orphan_tool_calls', + removed, + }); + this._history.length = trimmed.length; + this.tokenCountCoveredMessageCount = Math.min( + this.tokenCountCoveredMessageCount, + this._history.length, + ); + } + this.openSteps.clear(); + this.pendingToolResultIds.clear(); + this.flushDeferredMessagesIfToolExchangeClosed(); } get messages(): Message[] { diff --git a/packages/agent-core/src/agent/context/projector.ts b/packages/agent-core/src/agent/context/projector.ts index e0ae99972..e0bf12574 100644 --- a/packages/agent-core/src/agent/context/projector.ts +++ b/packages/agent-core/src/agent/context/projector.ts @@ -78,8 +78,9 @@ export function trimTrailingOpenToolExchange(history: readonly Message[]): Messa } const assistant = history[lastNonToolIndex]; - if (assistant === undefined) return []; - if (assistant.role !== 'assistant' || assistant.toolCalls.length === 0) return [...history]; + if (assistant === undefined || assistant.role !== 'assistant' || assistant.toolCalls.length === 0) { + return [...history]; + } const trailingToolCallIds = new Set( history diff --git a/packages/agent-core/src/agent/index.ts b/packages/agent-core/src/agent/index.ts index 1b90276f9..329bd75a7 100644 --- a/packages/agent-core/src/agent/index.ts +++ b/packages/agent-core/src/agent/index.ts @@ -296,6 +296,7 @@ export class Agent { async resume(): Promise<{ warning?: string }> { const result = await this.records.replay(); + this.context.cleanupOrphanedToolCalls(); this.goal.normalizeAfterReplay(); await this.background.loadFromDisk(); await this.background.reconcile(); diff --git a/packages/agent-core/src/agent/records/index.ts b/packages/agent-core/src/agent/records/index.ts index 8bf050398..5b959b438 100644 --- a/packages/agent-core/src/agent/records/index.ts +++ b/packages/agent-core/src/agent/records/index.ts @@ -99,6 +99,9 @@ function restoreAgentRecord(agent: Agent, input: AgentRecord): void { case 'context.undo': agent.context.undo(input.count); return; + case 'context.cleanup_orphan_tool_calls': + agent.context.cleanupOrphanedToolCalls(); + return; case 'tools.register_user_tool': agent.tools.registerUserTool(input); return; diff --git a/packages/agent-core/src/agent/records/types.ts b/packages/agent-core/src/agent/records/types.ts index 835c34465..ac741d171 100644 --- a/packages/agent-core/src/agent/records/types.ts +++ b/packages/agent-core/src/agent/records/types.ts @@ -82,6 +82,7 @@ export interface AgentRecordEvents { 'context.clear': {}; 'context.apply_compaction': CompactionResult; 'context.undo': { count: number }; + 'context.cleanup_orphan_tool_calls': { removed: number }; 'tools.update_store': ToolStoreUpdate; diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts index 1090b6f6f..84ee3d7db 100644 --- a/packages/agent-core/test/agent/compaction/full.test.ts +++ b/packages/agent-core/test/agent/compaction/full.test.ts @@ -880,8 +880,37 @@ describe('FullCompaction', () => { user: text "old user one" assistant: text "old assistant one" user: text "run both tools" - assistant: [] calls call_open_one:LookupOne { "query": "one" }, call_open_two:LookupTwo { "query": "two" } - tool[call_open_one]: text "one result" + user: text + `); + expect(ctx.agent.context.history.map((message) => message.role)).toEqual([ + 'assistant', + ]); + await ctx.expectResumeMatches(); + }); + + it('keeps a fully resolved tool exchange in the compaction prompt', async () => { + const ctx = testAgent(); + ctx.configure({ + provider: CATALOGUED_PROVIDER, + modelCapabilities: CATALOGUED_MODEL_CAPABILITIES, + }); + ctx.appendExchange(1, 'old user one', 'old assistant one', 20); + ctx.appendToolExchange(); + const compacted = ctx.once('context.apply_compaction'); + + ctx.mockNextResponse({ type: 'text', text: 'Compacted with tools.' }); + await ctx.rpc.beginCompaction({ instruction: 'Keep stable facts.' }); + await compacted; + + expect(ctx.lastLlmInput()).toMatchInlineSnapshot(` + system: + tools: [] + messages: + user: text "old user one" + assistant: text "old assistant one" + user: text "lookup something" + assistant: text "I will call Lookup." calls call_lookup:Lookup { "query": "moon" } + tool[call_lookup]: text "lookup result" user: text `); expect(ctx.agent.context.history.map((message) => message.role)).toEqual([ @@ -1123,7 +1152,7 @@ describe('FullCompaction', () => { ]); }); - it('keeps a deferred system reminder behind a partially resolved tool exchange across compaction', async () => { + it('flushes a deferred system reminder when a partially resolved tool exchange is compacted away', async () => { const ctx = testAgent(); ctx.configure({ provider: CATALOGUED_PROVIDER, @@ -1150,8 +1179,14 @@ describe('FullCompaction', () => { await ctx.rpc.beginCompaction({}); await compacted; + // The open tool exchange was removed by compaction, so pending state is cleared + // and the deferred reminder is flushed immediately. expect(ctx.agent.context.history.map((m) => m.role)).toEqual([ 'assistant', + 'user', + ]); + expect(ctx.agent.context.history.at(-1)?.content).toEqual([ + { type: 'text', text: '\nhost note\n' }, ]); ctx.dispatch({ @@ -1164,13 +1199,11 @@ describe('FullCompaction', () => { }, }); + // The late tool result is recorded as an orphan tool message. expect(ctx.agent.context.history.map((m) => m.role)).toEqual([ 'assistant', - 'tool', 'user', - ]); - expect(ctx.agent.context.history.at(-1)?.content).toEqual([ - { type: 'text', text: '\nhost note\n' }, + 'tool', ]); }); diff --git a/packages/agent-core/test/agent/context.test.ts b/packages/agent-core/test/agent/context.test.ts index 2a8a33c71..cab5aaf9b 100644 --- a/packages/agent-core/test/agent/context.test.ts +++ b/packages/agent-core/test/agent/context.test.ts @@ -1,9 +1,15 @@ import type { Message } from '@moonshot-ai/kosong'; import { describe, expect, it } from 'vitest'; +import type { CompactionStrategy } from '../../src/agent/compaction'; import { renderNotificationXml } from '../../src/agent/context/notification-xml'; -import { project } from '../../src/agent/context/projector'; +import { project, trimTrailingOpenToolExchange } from '../../src/agent/context/projector'; import type { ContextMessage } from '../../src/agent/context/types'; +import { + AGENT_WIRE_PROTOCOL_VERSION, + InMemoryAgentRecordPersistence, +} from '../../src/agent/records'; +import type { AgentRecord } from '../../src/agent/records'; import { estimateTokensForMessages } from '../../src/utils/tokens'; import { testAgent } from './harness/agent'; @@ -360,8 +366,6 @@ describe('Agent context', () => { expect(ctx.agent.context.messages.map((message) => message.role)).toEqual([ 'assistant', 'user', - 'assistant', - 'tool', ]); ctx.dispatch({ @@ -507,6 +511,101 @@ describe('Agent context', () => { ); }); + it('does not expose an unclosed tool call in the messages getter', () => { + const ctx = testAgent(); + ctx.configure(); + const stepUuid = 'orphan-tool-step'; + + ctx.agent.context.appendUserMessage([{ type: 'text', text: 'read a file' }]); + ctx.dispatch({ + type: 'context.append_loop_event', + event: { type: 'step.begin', uuid: stepUuid, turnId: '0', step: 1 }, + }); + ctx.dispatch({ + type: 'context.append_loop_event', + event: { + type: 'tool.call', + uuid: 'call_orphan_read', + turnId: '0', + step: 1, + stepUuid, + toolCallId: 'Read:158', + name: 'Read', + args: { file_path: '/tmp/test' }, + }, + }); + // Simulate a crash: no tool.result or step.end is recorded. + ctx.agent.context.appendUserMessage([{ type: 'text', text: 'continue after crash' }]); + + const toolCallIds = ctx.agent.context.messages + .filter((message) => message.role === 'assistant') + .flatMap((message) => message.toolCalls.map((toolCall) => toolCall.id)); + expect(toolCallIds).toEqual([]); + }); + + it('cleans up orphaned tool calls on resume and flushes deferred messages', async () => { + const records: AgentRecord[] = [ + { + type: 'metadata', + protocol_version: AGENT_WIRE_PROTOCOL_VERSION, + created_at: 1, + }, + { + type: 'context.append_message', + message: { + role: 'user', + content: [{ type: 'text', text: 'read a file' }], + toolCalls: [], + origin: { kind: 'user' }, + }, + }, + { + type: 'context.append_loop_event', + event: { + type: 'step.begin', + uuid: 'orphan-step', + turnId: '0', + step: 1, + }, + }, + { + type: 'context.append_loop_event', + event: { + type: 'tool.call', + uuid: 'call_orphan_read', + turnId: '0', + step: 1, + stepUuid: 'orphan-step', + toolCallId: 'Read:158', + name: 'Read', + args: { file_path: '/tmp/test' }, + }, + }, + { + type: 'context.append_message', + message: { + role: 'user', + content: [{ type: 'text', text: 'continue after crash' }], + toolCalls: [], + origin: { kind: 'user' }, + }, + }, + ]; + + const ctx = testAgent({ + persistence: new InMemoryAgentRecordPersistence(records), + }); + ctx.configure(); + + await ctx.agent.resume(); + + expect(ctx.agent.context.history.map((message) => message.role)).toEqual(['user', 'user']); + const toolCallIds = ctx.agent.context.messages + .filter((message) => message.role === 'assistant') + .flatMap((message) => message.toolCalls.map((toolCall) => toolCall.id)); + expect(toolCallIds).toEqual([]); + }); + it('undo only counts real user prompts, skipping background notifications', () => { const ctx = testAgent(); ctx.configure(); @@ -652,8 +751,183 @@ describe('Agent context', () => { ]); }); + it('compacts a resumed history with orphaned tool calls without throwing', async () => { + const records: AgentRecord[] = [ + { + type: 'metadata', + protocol_version: AGENT_WIRE_PROTOCOL_VERSION, + created_at: 1, + }, + { + type: 'context.append_message', + message: { + role: 'user', + content: [{ type: 'text', text: 'old prompt' }], + toolCalls: [], + origin: { kind: 'user' }, + }, + }, + { + type: 'context.append_loop_event', + event: { + type: 'step.begin', + uuid: 'old-step', + turnId: '0', + step: 1, + }, + }, + { + type: 'context.append_loop_event', + event: { + type: 'content.part', + uuid: 'old-part', + turnId: '0', + step: 1, + stepUuid: 'old-step', + part: { type: 'text', text: 'old response' }, + }, + }, + { + type: 'context.append_loop_event', + event: { + type: 'step.end', + uuid: 'old-step', + turnId: '0', + step: 1, + finishReason: 'end_turn', + }, + }, + { + type: 'context.append_message', + message: { + role: 'user', + content: [{ type: 'text', text: 'read a file' }], + toolCalls: [], + origin: { kind: 'user' }, + }, + }, + { + type: 'context.append_loop_event', + event: { + type: 'step.begin', + uuid: 'orphan-step', + turnId: '0', + step: 2, + }, + }, + { + type: 'context.append_loop_event', + event: { + type: 'tool.call', + uuid: 'call_orphan_read', + turnId: '0', + step: 2, + stepUuid: 'orphan-step', + toolCallId: 'Read:158', + name: 'Read', + args: { file_path: '/tmp/test' }, + }, + }, + { + type: 'context.append_message', + message: { + role: 'user', + content: [{ type: 'text', text: 'continue after crash' }], + toolCalls: [], + origin: { kind: 'user' }, + }, + }, + ]; + + const ctx = testAgent({ + persistence: new InMemoryAgentRecordPersistence(records), + compactionStrategy: alwaysCompactOnce, + }); + ctx.configure(); + + await ctx.agent.resume(); + + const compacted = ctx.once('context.apply_compaction'); + ctx.mockNextResponse({ type: 'text', text: 'Compacted after crash.' }); + await ctx.agent.fullCompaction.beforeStep(new AbortController().signal); + await compacted; + + expect(ctx.agent.context.history.map((message) => message.role)).toEqual(['assistant']); + }); + + it('flushes pending tool state after compaction removes an open tool exchange', async () => { + const ctx = testAgent({ compactionStrategy: alwaysCompactOnce }); + ctx.configure(); + ctx.appendExchange(1, 'old user one', 'old assistant one', 20); + ctx.appendPartiallyResolvedParallelToolExchange(); + + const compacted = ctx.once('context.apply_compaction'); + ctx.mockNextResponse({ type: 'text', text: 'Compacted open exchange.' }); + await ctx.agent.fullCompaction.beforeStep(new AbortController().signal); + await compacted; + + expect(ctx.agent.context.history.map((message) => message.role)).toEqual(['assistant']); + + ctx.agent.context.appendUserMessage([ + { type: 'text', text: 'continue after compaction' }, + ]); + + expect(ctx.agent.context.history.map((message) => message.role)).toEqual([ + 'assistant', + 'user', + ]); + const toolCallIds = ctx.agent.context.messages + .filter((message) => message.role === 'assistant') + .flatMap((message) => message.toolCalls.map((toolCall) => toolCall.id)); + expect(toolCallIds).toEqual([]); + }); + + it('flushes deferred messages after an open tool exchange is compacted away', async () => { + const ctx = testAgent({ compactionStrategy: alwaysCompactOnce }); + ctx.configure(); + ctx.appendExchange(1, 'old user one', 'old assistant one', 20); + ctx.appendPartiallyResolvedParallelToolExchange(); + + ctx.agent.context.appendUserMessage([ + { type: 'text', text: 'deferred while tool is open' }, + ]); + + // The new message is deferred because the tool exchange is still open. + expect(ctx.agent.context.history.map((message) => message.role)).toEqual([ + 'user', + 'assistant', + 'user', + 'assistant', + 'tool', + ]); + + const compacted = ctx.once('context.apply_compaction'); + ctx.mockNextResponse({ type: 'text', text: 'Compacted open exchange.' }); + await ctx.agent.fullCompaction.beforeStep(new AbortController().signal); + await compacted; + + // The open exchange was compacted away, so pending state is cleared and the + // deferred message is flushed into history. + expect(ctx.agent.context.history.map((message) => message.role)).toEqual([ + 'assistant', + 'user', + ]); + expect(ctx.agent.context.history.at(-1)?.content).toEqual([ + { type: 'text', text: 'deferred while tool is open' }, + ]); + }); + }); +const alwaysCompactOnce: CompactionStrategy = { + shouldCompact: () => true, + shouldBlock: () => true, + computeCompactCount: (messages: readonly Message[]) => messages.length, + reduceCompactOnOverflow: (messages: readonly Message[]) => messages.length, + checkAfterStep: true, + maxCompactionPerTurn: 1, +}; + describe('Agent context notification projection', () => { it('renders task notifications with escaped attributes and a bounded output tail', () => { const tail = Array.from({ length: 25 }, (_, index) => `line ${String(index + 1)}`).join('\n'); @@ -776,6 +1050,56 @@ describe('Agent context notification projection', () => { expect(textOf(messages[1]!)).toBe('No origin prompt'); expect(textOf(messages[2]!)).toBe('Third real prompt'); }); + + it('keeps a complete parallel tool exchange in projection', () => { + const messages = project([ + userMessage('run both tools', { kind: 'user' }), + { + role: 'assistant', + content: [], + toolCalls: [ + { type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' }, + { type: 'function', id: 'call_b', name: 'Lookup', arguments: '{}' }, + ], + }, + { role: 'tool', content: [{ type: 'text', text: 'a' }], toolCalls: [], toolCallId: 'call_a' }, + { role: 'tool', content: [{ type: 'text', text: 'b' }], toolCalls: [], toolCallId: 'call_b' }, + ]); + + expect(messages.map((message) => message.role)).toEqual([ + 'user', + 'assistant', + 'tool', + 'tool', + ]); + }); + + it('trims an open tool exchange from projection', () => { + const messages = trimTrailingOpenToolExchange( + project([ + userMessage('read a file', { kind: 'user' }), + { + role: 'assistant', + content: [], + toolCalls: [{ type: 'function', id: 'Read:158', name: 'Read', arguments: '{}' }], + }, + ]), + ); + + expect(messages.map((message) => message.role)).toEqual(['user']); + }); + + it('does not trim tool messages that lack a preceding assistant', () => { + const messages = trimTrailingOpenToolExchange([ + { role: 'tool', content: [{ type: 'text', text: 'orphan result' }], toolCalls: [], toolCallId: 'call_x' }, + ]); + + expect(messages.map((message) => message.role)).toEqual(['tool']); + }); + + it('does not trim an empty history', () => { + expect(trimTrailingOpenToolExchange([])).toEqual([]); + }); }); function userMessage(text: string, origin?: ContextMessage['origin']): ContextMessage { diff --git a/packages/agent-core/test/agent/resume.test.ts b/packages/agent-core/test/agent/resume.test.ts index fbe340f80..6b3465e18 100644 --- a/packages/agent-core/test/agent/resume.test.ts +++ b/packages/agent-core/test/agent/resume.test.ts @@ -157,7 +157,7 @@ describe('Agent resume', () => { await ctx.agent.resume(); - const toolCall = ctx.agent.context.messages[0]?.toolCalls[0] as + const toolCall = ctx.agent.context.history[0]?.toolCalls[0] as | { name?: string; arguments?: string | null; function?: unknown } | undefined; expect(toolCall).toMatchObject({