Filter unfinished tool calls

jahooma · jahooma · commit c03a7286f567 · 2025-11-26T23:45:26.000-08:00
diff --git a/.agents/editor/best-of-n/editor-best-of-n.ts b/.agents/editor/best-of-n/editor-best-of-n.ts
@@ -256,20 +256,19 @@ function* handleStepsMax({
     'editor-implementor-opus',
   ] as const
 
-  // Only keep messages up to just before the last spawn agent tool call.
+  // Only keep messages up to just before the last user role message (skips input prompt, instrucitons prompt).
   const { messageHistory: initialMessageHistory } = agentState
-  const lastSpawnAgentMessageIndex = initialMessageHistory.findLastIndex(
-    (message) =>
-      message.role === 'assistant' &&
-      Array.isArray(message.content) &&
-      message.content.length > 0 &&
-      message.content[0].type === 'tool-call' &&
-      message.content[0].toolName === 'spawn_agents',
-  )
-  const updatedMessageHistory = initialMessageHistory.slice(
-    0,
-    lastSpawnAgentMessageIndex,
-  )
+  let userMessageIndex = initialMessageHistory.length
+
+  while (userMessageIndex > 0) {
+    const message = initialMessageHistory[userMessageIndex - 1]
+    if (message.role === 'user') {
+      userMessageIndex--
+    } else {
+      break
+    }
+  }
+  const updatedMessageHistory = initialMessageHistory.slice(0, userMessageIndex)
   yield {
     toolName: 'set_messages',
     input: {
diff --git a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts
@@ -4,6 +4,7 @@ import { generateCompactId } from '@codebuff/common/util/string'
 
 import { loopAgentSteps } from '../../../run-agent-step'
 import { getAgentTemplate } from '../../../templates/agent-registry'
+import { filterUnfinishedToolCalls } from '../../../util/messages'
 
 import type { AgentTemplate } from '@codebuff/common/types/agent-template'
 import type { Logger } from '@codebuff/common/types/contracts/logger'
@@ -161,8 +162,11 @@ export function createAgentState(
 ): AgentState {
   const agentId = generateCompactId()
 
+  // When including message history, filter out any tool calls that don't have
+  // corresponding tool responses. This prevents the spawned agent from seeing
+  // unfinished tool calls which throw errors in the Anthropic API.
   const messageHistory = agentTemplate.includeMessageHistory
-    ? parentAgentState.messageHistory
+    ? filterUnfinishedToolCalls(parentAgentState.messageHistory)
     : []
 
   return {
diff --git a/packages/agent-runtime/src/util/__tests__/messages.test.ts b/packages/agent-runtime/src/util/__tests__/messages.test.ts
@@ -18,6 +18,7 @@ import {
   trimMessagesToFitTokenLimit,
   messagesWithSystem,
   getPreviouslyReadFiles,
+  filterUnfinishedToolCalls,
 } from '../../util/messages'
 import * as tokenCounter from '../token-counter'
 
@@ -406,6 +407,235 @@ describe('trimMessagesToFitTokenLimit', () => {
   })
 })
 
+describe('filterUnfinishedToolCalls', () => {
+  it('returns empty array when given empty messages', () => {
+    const result = filterUnfinishedToolCalls([])
+    expect(result).toEqual([])
+  })
+
+  it('keeps messages that are not assistant messages', () => {
+    const messages: Message[] = [
+      userMessage('Hello'),
+      systemMessage('System prompt'),
+      {
+        role: 'tool',
+        toolName: 'read_files',
+        toolCallId: 'tool-1',
+        content: jsonToolResult({ files: [] }),
+      },
+    ]
+
+    const result = filterUnfinishedToolCalls(messages)
+    expect(result).toHaveLength(3)
+    expect(result).toEqual(messages)
+  })
+
+  it('keeps assistant messages with text content only', () => {
+    const messages: Message[] = [
+      userMessage('Hello'),
+      assistantMessage('Hi there!'),
+      userMessage('How are you?'),
+      assistantMessage('I am doing well.'),
+    ]
+
+    const result = filterUnfinishedToolCalls(messages)
+    expect(result).toHaveLength(4)
+    expect(result).toEqual(messages)
+  })
+
+  it('keeps tool calls that have corresponding tool responses', () => {
+    const messages: Message[] = [
+      userMessage('Read a file'),
+      {
+        role: 'assistant',
+        content: [
+          {
+            type: 'tool-call',
+            toolCallId: 'call-1',
+            toolName: 'read_files',
+            input: { paths: ['test.ts'] },
+          },
+        ],
+      },
+      {
+        role: 'tool',
+        toolName: 'read_files',
+        toolCallId: 'call-1',
+        content: jsonToolResult({ content: 'file content' }),
+      },
+    ]
+
+    const result = filterUnfinishedToolCalls(messages)
+    expect(result).toHaveLength(3)
+    expect(result[1].role).toBe('assistant')
+    expect(result[1].content).toHaveLength(1)
+    expect(result[1].content[0].type).toBe('tool-call')
+  })
+
+  it('removes tool calls that do not have corresponding tool responses', () => {
+    const messages: Message[] = [
+      userMessage('Read a file'),
+      {
+        role: 'assistant',
+        content: [
+          {
+            type: 'tool-call',
+            toolCallId: 'call-1',
+            toolName: 'read_files',
+            input: { paths: ['test.ts'] },
+          },
+        ],
+      },
+      // No tool response for call-1
+    ]
+
+    const result = filterUnfinishedToolCalls(messages)
+    expect(result).toHaveLength(1) // Only the user message
+    expect(result[0].role).toBe('user')
+  })
+
+  it('removes only unfinished tool calls from assistant messages with mixed content', () => {
+    const messages: Message[] = [
+      userMessage('Read files'),
+      {
+        role: 'assistant',
+        content: [
+          { type: 'text', text: 'I will read these files' },
+          {
+            type: 'tool-call',
+            toolCallId: 'call-1',
+            toolName: 'read_files',
+            input: { paths: ['file1.ts'] },
+          },
+          {
+            type: 'tool-call',
+            toolCallId: 'call-2',
+            toolName: 'read_files',
+            input: { paths: ['file2.ts'] },
+          },
+        ],
+      },
+      {
+        role: 'tool',
+        toolName: 'read_files',
+        toolCallId: 'call-1',
+        content: jsonToolResult({ content: 'file1 content' }),
+      },
+      // No tool response for call-2
+    ]
+
+    const result = filterUnfinishedToolCalls(messages)
+    expect(result).toHaveLength(3) // user, assistant (filtered), tool
+
+    const assistantMsg = result[1]
+    expect(assistantMsg.role).toBe('assistant')
+    expect(assistantMsg.content).toHaveLength(2) // text + call-1 (call-2 removed)
+    expect(assistantMsg.content[0].type).toBe('text')
+    expect(assistantMsg.content[1].type).toBe('tool-call')
+    expect((assistantMsg.content[1] as any).toolCallId).toBe('call-1')
+  })
+
+  it('removes assistant message entirely if all content parts are unfinished tool calls', () => {
+    const messages: Message[] = [
+      userMessage('Do something'),
+      {
+        role: 'assistant',
+        content: [
+          {
+            type: 'tool-call',
+            toolCallId: 'call-1',
+            toolName: 'write_file',
+            input: { path: 'test.ts', content: 'test' },
+          },
+          {
+            type: 'tool-call',
+            toolCallId: 'call-2',
+            toolName: 'read_files',
+            input: { paths: ['other.ts'] },
+          },
+        ],
+      },
+      // No tool responses
+    ]
+
+    const result = filterUnfinishedToolCalls(messages)
+    expect(result).toHaveLength(1) // Only the user message
+    expect(result[0].role).toBe('user')
+  })
+
+  it('handles multiple assistant messages with different tool call states', () => {
+    const messages: Message[] = [
+      userMessage('First request'),
+      {
+        role: 'assistant',
+        content: [
+          {
+            type: 'tool-call',
+            toolCallId: 'call-1',
+            toolName: 'read_files',
+            input: { paths: ['file1.ts'] },
+          },
+        ],
+      },
+      {
+        role: 'tool',
+        toolName: 'read_files',
+        toolCallId: 'call-1',
+        content: jsonToolResult({ content: 'content1' }),
+      },
+      userMessage('Second request'),
+      {
+        role: 'assistant',
+        content: [
+          {
+            type: 'tool-call',
+            toolCallId: 'call-2',
+            toolName: 'write_file',
+            input: { path: 'test.ts', content: 'test' },
+          },
+        ],
+      },
+      // No tool response for call-2 (unfinished)
+    ]
+
+    const result = filterUnfinishedToolCalls(messages)
+    expect(result).toHaveLength(4) // user1, assistant1 (kept), tool1, user2
+    expect(result[0].role).toBe('user')
+    expect(result[1].role).toBe('assistant')
+    expect(result[2].role).toBe('tool')
+    expect(result[3].role).toBe('user')
+  })
+
+  it('preserves auxiliary message data on filtered assistant messages', () => {
+    const messages: Message[] = [
+      userMessage('Test'),
+      {
+        role: 'assistant',
+        content: [
+          { type: 'text', text: 'Response' },
+          {
+            type: 'tool-call',
+            toolCallId: 'call-1',
+            toolName: 'read_files',
+            input: { paths: ['test.ts'] },
+          },
+        ],
+        tags: ['important'],
+        keepDuringTruncation: true,
+      },
+      // No tool response
+    ]
+
+    const result = filterUnfinishedToolCalls(messages)
+    expect(result).toHaveLength(2)
+
+    const assistantMsg = result[1]
+    expect(assistantMsg.tags).toEqual(['important'])
+    expect(assistantMsg.keepDuringTruncation).toBe(true)
+    expect(assistantMsg.content).toHaveLength(1) // Only text, tool-call removed
+  })
+})
+
 describe('getPreviouslyReadFiles', () => {
   it('returns empty array when no messages provided', () => {
     const result = getPreviouslyReadFiles({ messages: [], logger })
diff --git a/packages/agent-runtime/src/util/messages.ts b/packages/agent-runtime/src/util/messages.ts
@@ -307,6 +307,53 @@ export function expireMessages(
   })
 }
 
+/**
+ * Removes tool calls from the message history that don't have corresponding tool responses.
+ * This is important when passing message history to spawned agents, as unfinished tool calls
+ * will cause issues with the LLM expecting tool responses.
+ *
+ * The function:
+ * 1. Collects all toolCallIds from tool response messages
+ * 2. Filters assistant messages to remove tool-call content parts without responses
+ * 3. Removes assistant messages that become empty after filtering
+ */
+export function filterUnfinishedToolCalls(messages: Message[]): Message[] {
+  // Collect all toolCallIds that have corresponding tool responses
+  const respondedToolCallIds = new Set<string>()
+  for (const message of messages) {
+    if (message.role === 'tool') {
+      respondedToolCallIds.add(message.toolCallId)
+    }
+  }
+
+  // Filter messages, removing unfinished tool calls from assistant messages
+  const filteredMessages: Message[] = []
+  for (const message of messages) {
+    if (message.role !== 'assistant') {
+      filteredMessages.push(message)
+      continue
+    }
+
+    // Filter out tool-call content parts that don't have responses
+    const filteredContent = message.content.filter((part) => {
+      if (part.type !== 'tool-call') {
+        return true
+      }
+      return respondedToolCallIds.has(part.toolCallId)
+    })
+
+    // Only include the assistant message if it has content after filtering
+    if (filteredContent.length > 0) {
+      filteredMessages.push({
+        ...message,
+        content: filteredContent,
+      })
+    }
+  }
+
+  return filteredMessages
+}
+
 export function getEditedFiles(params: {
   messages: Message[]
   logger: Logger