Restore state-backed tool call IDs

jahooma · jahooma · commit 9361c1b01efe · 2026-05-07T11:53:53.000-07:00
diff --git a/common/src/types/session-state.ts b/common/src/types/session-state.ts
@@ -49,6 +49,9 @@ export type AgentState = {
    * This is updated on every agent step via the /api/v1/token-count endpoint.
    */
   contextTokenCount: number
+  toolCallState?: {
+    nextIndex: number
+  }
 }
 
 export const AgentOutputSchema = z.discriminatedUnion('type', [
@@ -137,6 +140,7 @@ export function getInitialAgentState(): AgentState {
     systemPrompt: '',
     toolDefinitions: {},
     contextTokenCount: 0,
+    toolCallState: { nextIndex: 0 },
   }
 }
 export function getInitialSessionState(
diff --git a/packages/agent-runtime/src/__tests__/run-programmatic-step.test.ts b/packages/agent-runtime/src/__tests__/run-programmatic-step.test.ts
@@ -212,6 +212,28 @@ describe('runProgrammaticStep', () => {
   })
 
   describe('tool execution', () => {
+    it('assigns deterministic global ids to handleSteps tool calls', async () => {
+      const mockGenerator = (function* () {
+        yield { toolName: 'read_files', input: { paths: ['first.txt'] } }
+        yield { toolName: 'read_files', input: { paths: ['second.txt'] } }
+        yield { toolName: 'end_turn', input: {} }
+      })() as StepGenerator
+
+      mockTemplate.handleSteps = () => mockGenerator
+
+      await runProgrammaticStep(mockParams)
+
+      expect(executeToolCallSpy.mock.calls[0][0].toolCallId).toBe(
+        'functions.read_files.0',
+      )
+      expect(executeToolCallSpy.mock.calls[1][0].toolCallId).toBe(
+        'functions.read_files.1',
+      )
+      expect(executeToolCallSpy.mock.calls[2][0].toolCallId).toBe(
+        'functions.end_turn.2',
+      )
+    })
+
     it('should not add tool call message for add_message tool', async () => {
       const mockGenerator = (function* () {
         yield {
diff --git a/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts b/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts
@@ -464,13 +464,16 @@ describe('tool validation error handling', () => {
     )
     expect(toolCallEvents.length).toBe(1)
     expect(toolCallEvents[0].toolName).toBe('read_files')
+    expect(toolCallEvents[0].toolCallId).toBe('functions.read_files.0')
 
     // Verify tool_result event was emitted
     const toolResultEvents = responseChunks.filter(
       (chunk): chunk is Extract<PrintModeEvent, { type: 'tool_result' }> =>
         typeof chunk !== 'string' && chunk.type === 'tool_result',
     )
     expect(toolResultEvents.length).toBe(1)
+    expect(toolResultEvents[0].toolName).toBe('read_files')
+    expect(toolResultEvents[0].toolCallId).toBe('functions.read_files.0')
 
     // Verify NO error events
     const errorEvents = responseChunks.filter(
diff --git a/packages/agent-runtime/src/run-programmatic-step.ts b/packages/agent-runtime/src/run-programmatic-step.ts
@@ -6,7 +6,7 @@ import { cloneDeep } from 'lodash'
 import { clearProposedContentForRun } from './tools/handlers/tool/proposed-content-store'
 import { executeToolCall } from './tools/tool-executor'
 import { parseTextWithToolCalls } from './util/parse-tool-calls-from-text'
-
+import { createToolCallIdGenerator } from './util/tool-call-id'
 
 import type { FileProcessingState } from './tools/handlers/tool/write-file'
 import type { ExecuteToolCallParams } from './tools/tool-executor'
@@ -213,6 +213,7 @@ export async function runProgrammaticStep(
   let toolResult: ToolResultOutput[] | undefined = undefined
   let endTurn = false
   let generateN: number | undefined = undefined
+  const getToolCallId = createToolCallIdGenerator(agentState)
 
   let startTime = new Date()
   let creditsBefore = agentState.directCreditsUsed
@@ -273,6 +274,7 @@ export async function runProgrammaticStep(
             previousToolCallFinished: Promise.resolve(),
             toolCalls,
             toolResults,
+            getToolCallId,
             onResponseChunk,
           })
         }
@@ -301,6 +303,7 @@ export async function runProgrammaticStep(
         previousToolCallFinished: Promise.resolve(),
         toolCalls,
         toolResults,
+        getToolCallId,
         onResponseChunk,
       })
 
@@ -432,6 +435,7 @@ type ExecuteToolCallsArrayParams = Omit<
   | 'toolResultsToAddToMessageHistory'
 > & {
   agentState: AgentState
+  getToolCallId: (toolName: string) => string
   onResponseChunk: (chunk: string | PrintModeEvent) => void
 }
 
@@ -445,7 +449,7 @@ async function executeSingleToolCall(
   toolCallToExecute: ToolCallToExecute,
   params: ExecuteToolCallsArrayParams,
 ): Promise<ToolResultOutput[] | undefined> {
-  const { agentState, onResponseChunk, toolResults } = params
+  const { agentState, getToolCallId, onResponseChunk, toolResults } = params
 
   // Note: We don't check if the tool is available for the agent template anymore.
   // You can run any tool from handleSteps now!
@@ -455,7 +459,7 @@ async function executeSingleToolCall(
   //   )
   // }
 
-  const toolCallId = crypto.randomUUID()
+  const toolCallId = getToolCallId(toolCallToExecute.toolName)
   const excludeToolFromMessageHistory =
     toolCallToExecute.includeToolCall === false
 
diff --git a/packages/agent-runtime/src/tool-stream-parser.ts b/packages/agent-runtime/src/tool-stream-parser.ts
@@ -50,7 +50,6 @@ export async function* processStreamWithTools(params: {
   }
   trackEvent: TrackEventFn
   executeXmlToolCall: (params: {
-    toolCallId: string
     toolName: string
     input: Record<string, unknown>
   }) => Promise<void>
@@ -150,12 +149,9 @@ export async function* processStreamWithTools(params: {
 
       // Then process and yield any XML tool calls found
       for (const toolCall of toolCalls) {
-        const toolCallId = `xml-${crypto.randomUUID().slice(0, 8)}`
-
         // Execute the tool immediately if callback provided, pausing the stream
         // The callback handles emitting tool_call and tool_result events
         await executeXmlToolCall({
-          toolCallId,
           toolName: toolCall.toolName,
           input: toolCall.input,
         })
diff --git a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts
@@ -9,6 +9,7 @@ import { generateCompactId } from '@codebuff/common/util/string'
 import { loopAgentSteps } from '../../../run-agent-step'
 import { getAgentTemplate } from '../../../templates/agent-registry'
 import { formatValueForError } from '../../../util/format-value'
+import { ensureToolCallState } from '../../../util/tool-call-id'
 import {
   filterUnfinishedToolCalls,
   withSystemTags,
@@ -256,6 +257,7 @@ export function createAgentState(
   agentContext: Record<string, Subgoal>,
 ): AgentState {
   const agentId = generateCompactId()
+  const toolCallState = ensureToolCallState(parentAgentState)
 
   // When including message history, filter out any tool calls that don't have
   // corresponding tool responses. This prevents the spawned agent from seeing
@@ -295,6 +297,7 @@ export function createAgentState(
     systemPrompt: '',
     toolDefinitions: {},
     contextTokenCount: parentAgentState.contextTokenCount,
+    toolCallState,
   }
 }
 
diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts
@@ -5,7 +5,6 @@ import {
   assistantMessage,
   userMessage,
 } from '@codebuff/common/util/messages'
-import { generateCompactId } from '@codebuff/common/util/string'
 
 import { processStreamWithTools } from '../tool-stream-parser'
 import { INCLUDE_REASONING_IN_MESSAGE_HISTORY } from '../constants'
@@ -14,6 +13,7 @@ import {
   executeToolCall,
   tryTransformAgentToolCall,
 } from './tool-executor'
+import { createToolCallIdGenerator } from '../util/tool-call-id'
 import { withSystemTags } from '../util/messages'
 
 import type { CustomToolCall, ExecuteToolCallParams } from './tool-executor'
@@ -91,6 +91,7 @@ export async function processStream(
   const toolCalls: (CodebuffToolCall | CustomToolCall)[] = []
   const toolCallsToAddToMessageHistory: (CodebuffToolCall | CustomToolCall)[] = []
   const assistantMessages: Message[] = []
+  const getToolCallId = createToolCallIdGenerator(agentState)
   let hadToolCallError = false
   const errorMessages: Message[] = []
   const { promise: streamDonePromise, resolve: resolveStreamDonePromise } =
@@ -137,7 +138,6 @@ export async function processStream(
         if (signal.aborted) {
           return
         }
-        const toolCallId = generateCompactId()
         const isNativeTool = toolNames.includes(toolName as ToolName)
 
         // Check if this is an agent tool call that should be transformed to spawn_agents
@@ -160,19 +160,20 @@ export async function processStream(
         // Determine which executor to use and with what parameters
         let toolPromise: Promise<void>
         if (isNativeTool || transformed) {
+          const effectiveToolName = transformed
+            ? transformed.toolName
+            : (toolName as ToolName)
           // Use executeToolCall for native tools or transformed agent calls
           toolPromise = executeToolCall({
             ...params,
-            toolName: transformed
-              ? transformed.toolName
-              : (toolName as ToolName),
+            toolName: effectiveToolName,
             input: transformed ? transformed.input : input,
             fromHandleSteps: false,
 
             fileProcessingState,
             fullResponse: fullResponseChunks.join(''),
             previousToolCallFinished: previousPromise,
-            toolCallId,
+            toolCallId: getToolCallId(effectiveToolName),
             toolCalls,
             toolCallsToAddToMessageHistory,
             toolResults,
@@ -191,7 +192,7 @@ export async function processStream(
             fileProcessingState,
             fullResponse: fullResponseChunks.join(''),
             previousToolCallFinished: previousPromise,
-            toolCallId,
+            toolCallId: getToolCallId(toolName),
             toolCalls,
             toolCallsToAddToMessageHistory,
             toolResults,
diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts
@@ -1,12 +1,13 @@
 import { endsAgentStepParam, toolNames } from '@codebuff/common/tools/constants'
 import { toolParams } from '@codebuff/common/tools/list'
-import { generateCompactId } from '@codebuff/common/util/string'
+import { normalizeAgentIdForLookup } from '@codebuff/common/util/agent-id-parsing'
 import { cloneDeep } from 'lodash'
 
 import { getMCPToolData } from '../mcp'
 import { MCP_TOOL_SEPARATOR } from '../mcp-constants'
 import { getAgentShortName, getAgentToolName } from '../templates/prompts'
 import { formatValueForError } from '../util/format-value'
+import { createToolCallIdGenerator } from '../util/tool-call-id'
 import { codebuffToolHandlers } from './handlers/list'
 import { getMatchingSpawn } from './handlers/tool/spawn-agent-utils'
 import { getAgentTemplate } from '../templates/agent-registry'
@@ -308,7 +309,9 @@ export async function executeToolCall<T extends ToolName>(
     onResponseChunk,
     requestToolCall,
   } = params
-  const toolCallId = params.toolCallId ?? generateCompactId()
+  const toolCallId =
+    params.toolCallId ??
+    createToolCallIdGenerator(agentState, toolCalls)(toolName)
 
   const toolCall: CodebuffToolCall<T> | ToolCallError = parseRawToolCall<T>({
     rawToolCall: {
@@ -369,7 +372,9 @@ export async function executeToolCall<T extends ToolName>(
             }
           }
 
-          let agentIdToLoad = agentTypeStr
+          let agentIdToLoad = isBaseAgent
+            ? normalizeAgentIdForLookup(agentTypeStr)
+            : agentTypeStr
           if (!isBaseAgent) {
             const matchingSpawn = getMatchingSpawn(
               agentTemplate.spawnableAgents,
@@ -418,7 +423,13 @@ export async function executeToolCall<T extends ToolName>(
             }
           }
 
-          return { valid: true as const, agent }
+          return {
+            valid: true as const,
+            agent: {
+              ...(agent as Record<string, unknown>),
+              agent_type: agentIdToLoad,
+            },
+          }
         }),
       )
 
@@ -447,8 +458,8 @@ export async function executeToolCall<T extends ToolName>(
         }
         const errorMsg = `Some agents could not be spawned: ${errors.join('; ')}. Proceeding with valid agents only.`
         onResponseChunk({ type: 'error', message: errorMsg })
-        effectiveInput = { ...effectiveInput, agents: validAgents }
       }
+      effectiveInput = { ...effectiveInput, agents: validAgents }
     }
   }
 
@@ -640,7 +651,9 @@ export async function executeCustomToolCall(
     }),
     rawToolCall: {
       toolName,
-      toolCallId: toolCallId ?? generateCompactId(),
+      toolCallId:
+        toolCallId ??
+        createToolCallIdGenerator(agentState, toolCalls)(toolName),
       input,
     },
     autoInsertEndStepParam,
diff --git a/packages/agent-runtime/src/util/__tests__/tool-call-id.test.ts b/packages/agent-runtime/src/util/__tests__/tool-call-id.test.ts
diff --git a/packages/agent-runtime/src/util/tool-call-id.ts b/packages/agent-runtime/src/util/tool-call-id.ts

Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,9 @@ export type AgentState = {`
`49`	`49`	`* This is updated on every agent step via the /api/v1/token-count endpoint.`
`50`	`50`	`*/`
`51`	`51`	`contextTokenCount: number`
	`52`	`+ toolCallState?: {`
	`53`	`+ nextIndex: number`
	`54`	`+ }`
`52`	`55`	`}`
`53`	`56`
`54`	`57`	`export const AgentOutputSchema = z.discriminatedUnion('type', [`
`@@ -137,6 +140,7 @@ export function getInitialAgentState(): AgentState {`
`137`	`140`	`systemPrompt: '',`
`138`	`141`	`toolDefinitions: {},`
`139`	`142`	`contextTokenCount: 0,`
	`143`	`+ toolCallState: { nextIndex: 0 },`
`140`	`144`	`}`
`141`	`145`	`}`
`142`	`146`	`export function getInitialSessionState(`