Skip to content

Commit c03a728

Browse files
committed
Filter unfinished tool calls
1 parent 70693da commit c03a728

File tree

4 files changed

+294
-14
lines changed

4 files changed

+294
-14
lines changed

.agents/editor/best-of-n/editor-best-of-n.ts

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -256,20 +256,19 @@ function* handleStepsMax({
256256
'editor-implementor-opus',
257257
] as const
258258

259-
// Only keep messages up to just before the last spawn agent tool call.
259+
// Only keep messages up to just before the last user role message (skips input prompt, instrucitons prompt).
260260
const { messageHistory: initialMessageHistory } = agentState
261-
const lastSpawnAgentMessageIndex = initialMessageHistory.findLastIndex(
262-
(message) =>
263-
message.role === 'assistant' &&
264-
Array.isArray(message.content) &&
265-
message.content.length > 0 &&
266-
message.content[0].type === 'tool-call' &&
267-
message.content[0].toolName === 'spawn_agents',
268-
)
269-
const updatedMessageHistory = initialMessageHistory.slice(
270-
0,
271-
lastSpawnAgentMessageIndex,
272-
)
261+
let userMessageIndex = initialMessageHistory.length
262+
263+
while (userMessageIndex > 0) {
264+
const message = initialMessageHistory[userMessageIndex - 1]
265+
if (message.role === 'user') {
266+
userMessageIndex--
267+
} else {
268+
break
269+
}
270+
}
271+
const updatedMessageHistory = initialMessageHistory.slice(0, userMessageIndex)
273272
yield {
274273
toolName: 'set_messages',
275274
input: {

packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { generateCompactId } from '@codebuff/common/util/string'
44

55
import { loopAgentSteps } from '../../../run-agent-step'
66
import { getAgentTemplate } from '../../../templates/agent-registry'
7+
import { filterUnfinishedToolCalls } from '../../../util/messages'
78

89
import type { AgentTemplate } from '@codebuff/common/types/agent-template'
910
import type { Logger } from '@codebuff/common/types/contracts/logger'
@@ -161,8 +162,11 @@ export function createAgentState(
161162
): AgentState {
162163
const agentId = generateCompactId()
163164

165+
// When including message history, filter out any tool calls that don't have
166+
// corresponding tool responses. This prevents the spawned agent from seeing
167+
// unfinished tool calls which throw errors in the Anthropic API.
164168
const messageHistory = agentTemplate.includeMessageHistory
165-
? parentAgentState.messageHistory
169+
? filterUnfinishedToolCalls(parentAgentState.messageHistory)
166170
: []
167171

168172
return {

packages/agent-runtime/src/util/__tests__/messages.test.ts

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import {
1818
trimMessagesToFitTokenLimit,
1919
messagesWithSystem,
2020
getPreviouslyReadFiles,
21+
filterUnfinishedToolCalls,
2122
} from '../../util/messages'
2223
import * as tokenCounter from '../token-counter'
2324

@@ -406,6 +407,235 @@ describe('trimMessagesToFitTokenLimit', () => {
406407
})
407408
})
408409

410+
describe('filterUnfinishedToolCalls', () => {
411+
it('returns empty array when given empty messages', () => {
412+
const result = filterUnfinishedToolCalls([])
413+
expect(result).toEqual([])
414+
})
415+
416+
it('keeps messages that are not assistant messages', () => {
417+
const messages: Message[] = [
418+
userMessage('Hello'),
419+
systemMessage('System prompt'),
420+
{
421+
role: 'tool',
422+
toolName: 'read_files',
423+
toolCallId: 'tool-1',
424+
content: jsonToolResult({ files: [] }),
425+
},
426+
]
427+
428+
const result = filterUnfinishedToolCalls(messages)
429+
expect(result).toHaveLength(3)
430+
expect(result).toEqual(messages)
431+
})
432+
433+
it('keeps assistant messages with text content only', () => {
434+
const messages: Message[] = [
435+
userMessage('Hello'),
436+
assistantMessage('Hi there!'),
437+
userMessage('How are you?'),
438+
assistantMessage('I am doing well.'),
439+
]
440+
441+
const result = filterUnfinishedToolCalls(messages)
442+
expect(result).toHaveLength(4)
443+
expect(result).toEqual(messages)
444+
})
445+
446+
it('keeps tool calls that have corresponding tool responses', () => {
447+
const messages: Message[] = [
448+
userMessage('Read a file'),
449+
{
450+
role: 'assistant',
451+
content: [
452+
{
453+
type: 'tool-call',
454+
toolCallId: 'call-1',
455+
toolName: 'read_files',
456+
input: { paths: ['test.ts'] },
457+
},
458+
],
459+
},
460+
{
461+
role: 'tool',
462+
toolName: 'read_files',
463+
toolCallId: 'call-1',
464+
content: jsonToolResult({ content: 'file content' }),
465+
},
466+
]
467+
468+
const result = filterUnfinishedToolCalls(messages)
469+
expect(result).toHaveLength(3)
470+
expect(result[1].role).toBe('assistant')
471+
expect(result[1].content).toHaveLength(1)
472+
expect(result[1].content[0].type).toBe('tool-call')
473+
})
474+
475+
it('removes tool calls that do not have corresponding tool responses', () => {
476+
const messages: Message[] = [
477+
userMessage('Read a file'),
478+
{
479+
role: 'assistant',
480+
content: [
481+
{
482+
type: 'tool-call',
483+
toolCallId: 'call-1',
484+
toolName: 'read_files',
485+
input: { paths: ['test.ts'] },
486+
},
487+
],
488+
},
489+
// No tool response for call-1
490+
]
491+
492+
const result = filterUnfinishedToolCalls(messages)
493+
expect(result).toHaveLength(1) // Only the user message
494+
expect(result[0].role).toBe('user')
495+
})
496+
497+
it('removes only unfinished tool calls from assistant messages with mixed content', () => {
498+
const messages: Message[] = [
499+
userMessage('Read files'),
500+
{
501+
role: 'assistant',
502+
content: [
503+
{ type: 'text', text: 'I will read these files' },
504+
{
505+
type: 'tool-call',
506+
toolCallId: 'call-1',
507+
toolName: 'read_files',
508+
input: { paths: ['file1.ts'] },
509+
},
510+
{
511+
type: 'tool-call',
512+
toolCallId: 'call-2',
513+
toolName: 'read_files',
514+
input: { paths: ['file2.ts'] },
515+
},
516+
],
517+
},
518+
{
519+
role: 'tool',
520+
toolName: 'read_files',
521+
toolCallId: 'call-1',
522+
content: jsonToolResult({ content: 'file1 content' }),
523+
},
524+
// No tool response for call-2
525+
]
526+
527+
const result = filterUnfinishedToolCalls(messages)
528+
expect(result).toHaveLength(3) // user, assistant (filtered), tool
529+
530+
const assistantMsg = result[1]
531+
expect(assistantMsg.role).toBe('assistant')
532+
expect(assistantMsg.content).toHaveLength(2) // text + call-1 (call-2 removed)
533+
expect(assistantMsg.content[0].type).toBe('text')
534+
expect(assistantMsg.content[1].type).toBe('tool-call')
535+
expect((assistantMsg.content[1] as any).toolCallId).toBe('call-1')
536+
})
537+
538+
it('removes assistant message entirely if all content parts are unfinished tool calls', () => {
539+
const messages: Message[] = [
540+
userMessage('Do something'),
541+
{
542+
role: 'assistant',
543+
content: [
544+
{
545+
type: 'tool-call',
546+
toolCallId: 'call-1',
547+
toolName: 'write_file',
548+
input: { path: 'test.ts', content: 'test' },
549+
},
550+
{
551+
type: 'tool-call',
552+
toolCallId: 'call-2',
553+
toolName: 'read_files',
554+
input: { paths: ['other.ts'] },
555+
},
556+
],
557+
},
558+
// No tool responses
559+
]
560+
561+
const result = filterUnfinishedToolCalls(messages)
562+
expect(result).toHaveLength(1) // Only the user message
563+
expect(result[0].role).toBe('user')
564+
})
565+
566+
it('handles multiple assistant messages with different tool call states', () => {
567+
const messages: Message[] = [
568+
userMessage('First request'),
569+
{
570+
role: 'assistant',
571+
content: [
572+
{
573+
type: 'tool-call',
574+
toolCallId: 'call-1',
575+
toolName: 'read_files',
576+
input: { paths: ['file1.ts'] },
577+
},
578+
],
579+
},
580+
{
581+
role: 'tool',
582+
toolName: 'read_files',
583+
toolCallId: 'call-1',
584+
content: jsonToolResult({ content: 'content1' }),
585+
},
586+
userMessage('Second request'),
587+
{
588+
role: 'assistant',
589+
content: [
590+
{
591+
type: 'tool-call',
592+
toolCallId: 'call-2',
593+
toolName: 'write_file',
594+
input: { path: 'test.ts', content: 'test' },
595+
},
596+
],
597+
},
598+
// No tool response for call-2 (unfinished)
599+
]
600+
601+
const result = filterUnfinishedToolCalls(messages)
602+
expect(result).toHaveLength(4) // user1, assistant1 (kept), tool1, user2
603+
expect(result[0].role).toBe('user')
604+
expect(result[1].role).toBe('assistant')
605+
expect(result[2].role).toBe('tool')
606+
expect(result[3].role).toBe('user')
607+
})
608+
609+
it('preserves auxiliary message data on filtered assistant messages', () => {
610+
const messages: Message[] = [
611+
userMessage('Test'),
612+
{
613+
role: 'assistant',
614+
content: [
615+
{ type: 'text', text: 'Response' },
616+
{
617+
type: 'tool-call',
618+
toolCallId: 'call-1',
619+
toolName: 'read_files',
620+
input: { paths: ['test.ts'] },
621+
},
622+
],
623+
tags: ['important'],
624+
keepDuringTruncation: true,
625+
},
626+
// No tool response
627+
]
628+
629+
const result = filterUnfinishedToolCalls(messages)
630+
expect(result).toHaveLength(2)
631+
632+
const assistantMsg = result[1]
633+
expect(assistantMsg.tags).toEqual(['important'])
634+
expect(assistantMsg.keepDuringTruncation).toBe(true)
635+
expect(assistantMsg.content).toHaveLength(1) // Only text, tool-call removed
636+
})
637+
})
638+
409639
describe('getPreviouslyReadFiles', () => {
410640
it('returns empty array when no messages provided', () => {
411641
const result = getPreviouslyReadFiles({ messages: [], logger })

packages/agent-runtime/src/util/messages.ts

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,53 @@ export function expireMessages(
307307
})
308308
}
309309

310+
/**
311+
* Removes tool calls from the message history that don't have corresponding tool responses.
312+
* This is important when passing message history to spawned agents, as unfinished tool calls
313+
* will cause issues with the LLM expecting tool responses.
314+
*
315+
* The function:
316+
* 1. Collects all toolCallIds from tool response messages
317+
* 2. Filters assistant messages to remove tool-call content parts without responses
318+
* 3. Removes assistant messages that become empty after filtering
319+
*/
320+
export function filterUnfinishedToolCalls(messages: Message[]): Message[] {
321+
// Collect all toolCallIds that have corresponding tool responses
322+
const respondedToolCallIds = new Set<string>()
323+
for (const message of messages) {
324+
if (message.role === 'tool') {
325+
respondedToolCallIds.add(message.toolCallId)
326+
}
327+
}
328+
329+
// Filter messages, removing unfinished tool calls from assistant messages
330+
const filteredMessages: Message[] = []
331+
for (const message of messages) {
332+
if (message.role !== 'assistant') {
333+
filteredMessages.push(message)
334+
continue
335+
}
336+
337+
// Filter out tool-call content parts that don't have responses
338+
const filteredContent = message.content.filter((part) => {
339+
if (part.type !== 'tool-call') {
340+
return true
341+
}
342+
return respondedToolCallIds.has(part.toolCallId)
343+
})
344+
345+
// Only include the assistant message if it has content after filtering
346+
if (filteredContent.length > 0) {
347+
filteredMessages.push({
348+
...message,
349+
content: filteredContent,
350+
})
351+
}
352+
}
353+
354+
return filteredMessages
355+
}
356+
310357
export function getEditedFiles(params: {
311358
messages: Message[]
312359
logger: Logger

0 commit comments

Comments
 (0)