diff --git a/agents/__tests__/context-pruner.test.ts b/agents/__tests__/context-pruner.test.ts
index b691f33a9..4837740e7 100644
--- a/agents/__tests__/context-pruner.test.ts
+++ b/agents/__tests__/context-pruner.test.ts
@@ -292,9 +292,12 @@ describe('context-pruner handleSteps', () => {
expect(content).toContain('')
expect(content).toContain('')
- // Should contain the user and assistant markers
- expect(content).toContain('[USER]')
- expect(content).toContain('[ASSISTANT]')
+ // Should use a memory artifact format, not transcript role markers
+ expect(content).toContain('')
+ expect(content).toContain('User request:')
+ expect(content).toContain('Progress note:')
+ expect(content).not.toContain('[USER]')
+ expect(content).not.toContain('[ASSISTANT]')
})
test('includes tool call summaries in the output', () => {
@@ -303,7 +306,9 @@ describe('context-pruner handleSteps', () => {
createToolCallMessage('call-1', 'read_files', {
paths: ['file1.ts', 'file2.ts'],
}),
- createToolResultMessage('call-1', 'read_files', { content: 'file data' } as JSONValue),
+ createToolResultMessage('call-1', 'read_files', {
+ content: 'file data',
+ } as JSONValue),
createMessage('user', 'Now edit this file'),
createToolCallMessage('call-2', 'str_replace', {
path: 'file1.ts',
@@ -316,8 +321,8 @@ describe('context-pruner handleSteps', () => {
const content = results[0].input.messages[0].content[0].text
// Should contain tool summaries
- expect(content).toContain('Read files: file1.ts, file2.ts')
- expect(content).toContain('Edited file: file1.ts')
+ expect(content).toContain('Previously inspected files: file1.ts, file2.ts')
+ expect(content).toContain('Previously edited file: file1.ts')
})
test('summarizes various tool types correctly', () => {
@@ -345,10 +350,10 @@ describe('context-pruner handleSteps', () => {
const results = runHandleSteps(messages, 50000, 10000)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('Wrote file: new-file.ts')
- expect(content).toContain('Ran command: npm test')
- expect(content).toContain('Code search: "function"')
- expect(content).toContain('Spawned agents:')
+ expect(content).toContain('Previously wrote file: new-file.ts')
+ expect(content).toContain('Previously ran command: npm test')
+ expect(content).toContain('Previous code search for "function"')
+ expect(content).toContain('Previously delegated agents:')
expect(content).toContain('- file-picker')
expect(content).toContain('- commander')
})
@@ -365,7 +370,7 @@ describe('context-pruner handleSteps', () => {
const results = runHandleSteps(messages, 50000, 10000)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('[TOOL ERROR: read_files] File not found')
+ expect(content).toContain('Tool error from read_files: File not found')
})
test('notes when user messages have images', () => {
@@ -382,7 +387,7 @@ describe('context-pruner handleSteps', () => {
const results = runHandleSteps(messages, 50000, 10000)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('[USER] [with image(s)]')
+ expect(content).toContain('User request [image(s) were attached]:')
})
test('removes only INSTRUCTIONS_PROMPT and SUBAGENT_SPAWN when under context limit', () => {
@@ -490,6 +495,90 @@ describe('context-pruner handleSteps', () => {
expect(instructionsContent).toBe('Parent agent instructions')
})
+ test('preserves tagged live user prompt as a real message after summary', () => {
+ const liveUserPrompt: Message = {
+ role: 'user',
+ content: [{ type: 'text', text: 'LATEST LIVE REQUEST' }],
+ tags: ['USER_PROMPT'],
+ }
+ const instructionsPrompt: Message = {
+ role: 'user',
+ content: [{ type: 'text', text: 'Parent instructions' }],
+ tags: ['INSTRUCTIONS_PROMPT'],
+ }
+ const prunerParamsPrompt: Message = {
+ role: 'user',
+ content: [{ type: 'text', text: '{"maxContextLength":200000}' }],
+ tags: ['USER_PROMPT'],
+ }
+ const messages: Message[] = [
+ createMessage('user', 'Older request'),
+ createMessage('assistant', 'Older answer'),
+ liveUserPrompt,
+ instructionsPrompt,
+ prunerParamsPrompt,
+ ]
+
+ const results = runHandleSteps(messages, 250000, 200000)
+ const resultMessages = results[0].input.messages
+
+ expect(resultMessages).toHaveLength(2)
+ const summaryContent = (resultMessages[0].content[0] as { text: string })
+ .text
+ expect(summaryContent).toContain('Older request')
+ expect(summaryContent).not.toContain('LATEST LIVE REQUEST')
+ expect(resultMessages[1]).toEqual(
+ expect.objectContaining({
+ role: 'user',
+ tags: ['USER_PROMPT'],
+ }),
+ )
+ expect((resultMessages[1].content[0] as { text: string }).text).toBe(
+ 'LATEST LIVE REQUEST',
+ )
+ })
+
+ test('keeps live user prompt in memory and adds continuation prompt when pruning mid-turn', () => {
+ const liveUserPrompt: Message = {
+ role: 'user',
+ content: [{ type: 'text', text: 'PLEASE FIX THE BUG' }],
+ tags: ['USER_PROMPT'],
+ }
+ const prunerParamsPrompt: Message = {
+ role: 'user',
+ content: [{ type: 'text', text: '{"maxContextLength":200000}' }],
+ tags: ['USER_PROMPT'],
+ }
+ const messages: Message[] = [
+ liveUserPrompt,
+ createMessage('assistant', 'I found the likely issue.'),
+ createToolCallMessage('call-1', 'read_files', {
+ paths: ['src/bug.ts'],
+ }),
+ createToolResultMessage('call-1', 'read_files', {
+ content: 'buggy code',
+ }),
+ prunerParamsPrompt,
+ ]
+
+ const results = runHandleSteps(messages, 250000, 200000)
+ const resultMessages = results[0].input.messages
+
+ expect(resultMessages).toHaveLength(2)
+ const summaryContent = (resultMessages[0].content[0] as { text: string })
+ .text
+ expect(summaryContent).toContain('PLEASE FIX THE BUG')
+ expect(summaryContent).toContain('I found the likely issue.')
+ expect(summaryContent).toContain('Previously inspected files: src/bug.ts')
+
+ expect(resultMessages[1].role).toBe('user')
+ expect(resultMessages[1].tags).toBeUndefined()
+ const continuationText = (resultMessages[1].content[0] as { text: string })
+ .text
+ expect(continuationText).toContain('Continue the existing assistant turn')
+ expect(continuationText).toContain('Do not restart completed work')
+ })
+
test('handles empty message history', () => {
const messages: Message[] = []
@@ -564,7 +653,7 @@ describe('context-pruner handleSteps', () => {
const results = runHandleSteps(messages, 50000, 10000)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('Spawned agent: file-picker')
+ expect(content).toContain('Previously delegated agent file-picker')
})
test('handles long terminal commands by truncating', () => {
@@ -583,7 +672,7 @@ describe('context-pruner handleSteps', () => {
// Should truncate to 50 chars + ...
expect(content).toContain(
- 'Ran command: npm run build -- --config=production --verbose --o...',
+ 'Previously ran command: npm run build -- --config=production --verbose --o...',
)
})
@@ -597,7 +686,7 @@ describe('context-pruner handleSteps', () => {
const results = runHandleSteps(messages, 50000, 10000)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('Used tool: unknown_tool_name')
+ expect(content).toContain('Previously used tool unknown_tool_name')
})
test('handles multiple tool calls in single assistant message', () => {
@@ -630,8 +719,8 @@ describe('context-pruner handleSteps', () => {
const content = results[0].input.messages[0].content[0].text
// Both tool calls should be in the summary
- expect(content).toContain('Read files: a.ts')
- expect(content).toContain('Read files: b.ts')
+ expect(content).toContain('Previously inspected files: a.ts')
+ expect(content).toContain('Previously inspected files: b.ts')
})
test('handles mixed text and tool calls in assistant message', () => {
@@ -659,7 +748,7 @@ describe('context-pruner handleSteps', () => {
// Should have both text and tool summary
expect(content).toContain('Let me read that file for you')
- expect(content).toContain('Read files: test.ts')
+ expect(content).toContain('Previously inspected files: test.ts')
})
})
@@ -803,7 +892,9 @@ describe('context-pruner code_search with flags', () => {
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('Code search: "myFunction" (-g *.ts -i)')
+ expect(content).toContain(
+ 'Previous code search for "myFunction" (-g *.ts -i)',
+ )
})
})
@@ -877,7 +968,7 @@ describe('context-pruner ask_user with questions and answers', () => {
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('[USER ANSWERED] Option B was selected')
+ expect(content).toContain('User answered: Option B was selected')
})
test('includes multi-select answers', () => {
@@ -896,7 +987,7 @@ describe('context-pruner ask_user with questions and answers', () => {
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('[USER ANSWERED] Caching, Logging, Monitoring')
+ expect(content).toContain('User answered: Caching, Logging, Monitoring')
})
test('shows when user skipped question', () => {
@@ -913,7 +1004,7 @@ describe('context-pruner ask_user with questions and answers', () => {
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('[USER SKIPPED QUESTION]')
+ expect(content).toContain('User skipped question')
})
})
@@ -964,7 +1055,7 @@ describe('context-pruner terminal command exit codes', () => {
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('[COMMAND FAILED] Exit code: 1')
+ expect(content).toContain('Command failed with exit code: 1')
})
test('does not show failure for successful command (exit code 0)', () => {
@@ -982,7 +1073,7 @@ describe('context-pruner terminal command exit codes', () => {
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).not.toContain('[COMMAND FAILED]')
+ expect(content).not.toContain('Command failed with exit code')
})
})
@@ -1257,9 +1348,7 @@ First assistant response
})
test('keeps multi-part tool entries grouped across compaction cycles', () => {
- const simulateCompaction = (
- inputMessages: Message[],
- ): Message => {
+ const simulateCompaction = (inputMessages: Message[]): Message => {
const result = runHandleSteps(inputMessages, 250000, 200000)
return result[0].input.messages[0]
}
@@ -1285,8 +1374,10 @@ First assistant response
.text
// Both parts should be present in cycle 1
- expect(summary1Text).toContain('[TOOL ERROR: run_terminal_command] Test suite failed')
- expect(summary1Text).toContain('[COMMAND FAILED] Exit code: 1')
+ expect(summary1Text).toContain(
+ 'Tool error from run_terminal_command: Test suite failed',
+ )
+ expect(summary1Text).toContain('Command failed with exit code: 1')
// Cycle 2: re-compact — the multi-part entry should stay as one entry
const cycle2Messages: Message[] = [
@@ -1299,8 +1390,10 @@ First assistant response
.text
// Both parts should still be present together after re-compaction
- expect(summary2Text).toContain('[TOOL ERROR: run_terminal_command] Test suite failed')
- expect(summary2Text).toContain('[COMMAND FAILED] Exit code: 1')
+ expect(summary2Text).toContain(
+ 'Tool error from run_terminal_command: Test suite failed',
+ )
+ expect(summary2Text).toContain('Command failed with exit code: 1')
// They should be within the same --- delimited chunk (not split apart)
const separator = '\n\n---\n\n'
@@ -1308,9 +1401,9 @@ First assistant response
.replace(/[\s\S]*?\n\n/, '')
.replace(/<\/conversation_summary>[\s\S]*/, '')
.split(separator)
- const errorChunk = chunks.find((c) => c.includes('[TOOL ERROR:'))
+ const errorChunk = chunks.find((c) => c.includes('Tool error from'))
expect(errorChunk).toBeDefined()
- expect(errorChunk).toContain('[COMMAND FAILED] Exit code: 1')
+ expect(errorChunk).toContain('Command failed with exit code: 1')
})
test('handles 3+ compaction cycles without nested PREVIOUS SUMMARY markers', () => {
@@ -1562,14 +1655,15 @@ describe('context-pruner str_replace and write_file tool results', () => {
createToolResultMessage('call-1', 'str_replace', {
file: 'src/utils.ts',
message: 'Updated file',
- unifiedDiff: '--- a/src/utils.ts\n+++ b/src/utils.ts\n@@ -1,1 +1,1 @@\n-foo\n+bar',
+ unifiedDiff:
+ '--- a/src/utils.ts\n+++ b/src/utils.ts\n@@ -1,1 +1,1 @@\n-foo\n+bar',
}),
]
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('[EDIT RESULT: str_replace]')
+ expect(content).toContain('Edit result from str_replace:')
expect(content).toContain('unifiedDiff')
expect(content).toContain('-foo')
expect(content).toContain('+bar')
@@ -1585,14 +1679,15 @@ describe('context-pruner str_replace and write_file tool results', () => {
createToolResultMessage('call-1', 'write_file', {
file: 'src/new-file.ts',
message: 'Created file',
- unifiedDiff: '--- /dev/null\n+++ b/src/new-file.ts\n@@ -0,0 +1 @@\n+export const hello = "world"',
+ unifiedDiff:
+ '--- /dev/null\n+++ b/src/new-file.ts\n@@ -0,0 +1 @@\n+export const hello = "world"',
}),
]
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('[EDIT RESULT: write_file]')
+ expect(content).toContain('Edit result from write_file:')
expect(content).toContain('export const hello')
})
@@ -1614,7 +1709,7 @@ describe('context-pruner str_replace and write_file tool results', () => {
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('[EDIT RESULT: str_replace]')
+ expect(content).toContain('Edit result from str_replace:')
expect(content).toContain('...')
// Should not contain the full diff
expect(content).not.toContain(longDiff)
@@ -1680,8 +1775,8 @@ describe('context-pruner str_replace and write_file tool results', () => {
const content = results[0].input.messages[0].content[0].text
// Should have both the tool call summary and the full result
- expect(content).toContain('Edited file: src/file.ts')
- expect(content).toContain('[EDIT RESULT: str_replace]')
+ expect(content).toContain('Previously edited file: src/file.ts')
+ expect(content).toContain('Edit result from str_replace:')
expect(content).toContain('errorMessage')
expect(content).toContain('No match found for old string')
})
@@ -1731,7 +1826,7 @@ describe('context-pruner glob and list_directory tools', () => {
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('Glob: **/*.ts')
+ expect(content).toContain('Previous glob search for **/*.ts')
})
test('summarizes list_directory tool with path', () => {
@@ -1746,7 +1841,7 @@ describe('context-pruner glob and list_directory tools', () => {
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('Listed dir: src')
+ expect(content).toContain('Previously listed directory: src')
})
test('summarizes read_subtree tool with paths', () => {
@@ -1761,7 +1856,9 @@ describe('context-pruner glob and list_directory tools', () => {
const results = runHandleSteps(messages)
const content = results[0].input.messages[0].content[0].text
- expect(content).toContain('Read subtree: src/components, src/utils')
+ expect(content).toContain(
+ 'Previously inspected subtrees: src/components, src/utils',
+ )
})
})
@@ -1920,17 +2017,24 @@ describe('context-pruner dual-budget behavior', () => {
})
test('counts tool result summaries against assistant+tool budget', () => {
- // Use str_replace with a large result — this produces a summarized [EDIT RESULT] entry
+ // Use str_replace with a large result — this produces a summarized edit-result entry
const largeDiff = 'LARGE_DIFF_CONTENT_' + 'X'.repeat(900)
const messages = [
createMessage('user', 'Do something'),
- createToolCallMessage('call-1', 'str_replace', { path: 'big.ts', replacements: [] }),
- createToolResultMessage('call-1', 'str_replace', { file: 'big.ts', message: 'Updated', unifiedDiff: largeDiff }),
+ createToolCallMessage('call-1', 'str_replace', {
+ path: 'big.ts',
+ replacements: [],
+ }),
+ createToolResultMessage('call-1', 'str_replace', {
+ file: 'big.ts',
+ message: 'Updated',
+ unifiedDiff: largeDiff,
+ }),
createMessage('user', 'Recent question'),
createMessage('assistant', 'Recent answer'),
]
- // Assistant budget too small for the large [EDIT RESULT] summary entry
+ // Assistant budget too small for the large edit-result summary entry
const results = runHandleSteps(messages, 250000, 200000, {
assistantToolBudget: 100,
userBudget: 5000,
@@ -2133,11 +2237,23 @@ describe('context-pruner dual-budget behavior', () => {
// Long user message (~45k chars, exceeds USER_MESSAGE_LIMIT of 13k tokens = 39k chars)
// Middle marker placed ~85% through so it falls in the truncated gap
// (past the 80% prefix but before the 20% suffix)
- const longUserMessage = 'LONG_USER_START_' + 'Here is a detailed specification for the new feature. '.repeat(650) + '_LONG_USER_MIDDLE_MARKER_' + 'Here is a detailed specification for the new feature. '.repeat(150)
+ const longUserMessage =
+ 'LONG_USER_START_' +
+ 'Here is a detailed specification for the new feature. '.repeat(650) +
+ '_LONG_USER_MIDDLE_MARKER_' +
+ 'Here is a detailed specification for the new feature. '.repeat(150)
// Long assistant message with text (~8k chars, exceeds ASSISTANT_MESSAGE_LIMIT of 1.3k tokens = 3.9k chars)
// plus multiple tool calls. Middle marker placed ~60% through so it falls in the truncated gap.
- const longAssistantText = 'LONG_ASSISTANT_START_' + 'I will implement this step by step, starting with the data model changes. '.repeat(60) + '_LONG_ASST_MIDDLE_MARKER_' + 'I will implement this step by step, starting with the data model changes. '.repeat(40)
+ const longAssistantText =
+ 'LONG_ASSISTANT_START_' +
+ 'I will implement this step by step, starting with the data model changes. '.repeat(
+ 60,
+ ) +
+ '_LONG_ASST_MIDDLE_MARKER_' +
+ 'I will implement this step by step, starting with the data model changes. '.repeat(
+ 40,
+ )
const assistantWithToolCalls: Message = {
role: 'assistant',
content: [
@@ -2172,7 +2288,8 @@ describe('context-pruner dual-budget behavior', () => {
}
// str_replace result with a large diff (~3k chars, exceeds 2k truncation limit)
- const largeDiff = 'DIFF_START_MARKER_' + '+added line\n'.repeat(250) + '_DIFF_END_MARKER'
+ const largeDiff =
+ 'DIFF_START_MARKER_' + '+added line\n'.repeat(250) + '_DIFF_END_MARKER'
// spawn_agents result with 5 non-blacklisted agents producing large outputs
// Each ~4k chars, total ~20k, exceeds TOOL_ENTRY_LIMIT of 5k tokens = 15k chars
@@ -2180,7 +2297,10 @@ describe('context-pruner dual-budget behavior', () => {
agentType: 'editor',
value: {
type: 'string',
- value: `AGENT_${i}_OUTPUT_START_` + 'Implementation details. '.repeat(160) + `_AGENT_${i}_OUTPUT_END`,
+ value:
+ `AGENT_${i}_OUTPUT_START_` +
+ 'Implementation details. '.repeat(160) +
+ `_AGENT_${i}_OUTPUT_END`,
},
}))
@@ -2188,8 +2308,14 @@ describe('context-pruner dual-budget behavior', () => {
previousSummary,
createMessage('user', longUserMessage),
assistantWithToolCalls,
- createToolResultMessage('call-1', 'read_files', { content: 'file data' } as JSONValue),
- createToolResultMessage('call-2', 'str_replace', { file: 'src/model.ts', message: 'Updated', unifiedDiff: largeDiff }),
+ createToolResultMessage('call-1', 'read_files', {
+ content: 'file data',
+ } as JSONValue),
+ createToolResultMessage('call-2', 'str_replace', {
+ file: 'src/model.ts',
+ message: 'Updated',
+ unifiedDiff: largeDiff,
+ }),
{
role: 'tool',
toolCallId: 'call-3',
@@ -2210,7 +2336,8 @@ describe('context-pruner dual-budget behavior', () => {
// === Structure checks ===
expect(content).toContain('')
expect(content).toContain('')
- const summaryTagCount = (content.match(//g) || []).length
+ const summaryTagCount = (content.match(//g) || [])
+ .length
expect(summaryTagCount).toBe(1)
// === Previous summary entries preserved ===
@@ -2229,12 +2356,14 @@ describe('context-pruner dual-budget behavior', () => {
expect(content).not.toContain('_LONG_ASST_MIDDLE_MARKER_') // Middle marker falls in truncated gap
// === Tool call summaries present ===
- expect(content).toContain('Read files: src/model.ts, src/service.ts')
- expect(content).toContain('Edited file: src/model.ts')
- expect(content).toContain('Spawned agents:')
+ expect(content).toContain(
+ 'Previously inspected files: src/model.ts, src/service.ts',
+ )
+ expect(content).toContain('Previously edited file: src/model.ts')
+ expect(content).toContain('Previously delegated agents:')
// === str_replace result: present but truncated at 2k chars ===
- expect(content).toContain('[EDIT RESULT: str_replace]')
+ expect(content).toContain('Edit result from str_replace:')
expect(content).toContain('DIFF_START_MARKER_')
expect(content).not.toContain('_DIFF_END_MARKER') // Truncated by 2k result limit
@@ -2258,13 +2387,16 @@ describe('context-pruner dual-budget behavior', () => {
content: [
{
type: 'text',
- text: `\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\nOLD_DROPPED_USER: ${'X'.repeat(600)}\n\n---\n\n[ASSISTANT]\nOLD_DROPPED_ASSISTANT: ${'Y'.repeat(600)}\n\n---\n\n[USER]\nOLD_DROPPED_USER_2: Asked about deployment\n\n---\n\n[ASSISTANT]\nOLD_DROPPED_ASSISTANT_2: Explained deployment process\n`,
+ text: `\nThis is a summary of the conversation so far. The original messages have been condensed to save context space.\n\n[USER]\nOLD_DROPPED_USER: ${'X'.repeat(600)}\n\n---\n\n[ASSISTANT]\nOLD_DROPPED_ASSISTANT: ${'Y'.repeat(600)}\n\n---\n\n[USER]\nOLD_DROPPED_USER_2: Asked about deployment\n\n---\n\n[ASSISTANT]\nOLD_DROPPED_ASSISTANT_2: ${'Explained deployment process. '.repeat(80)}\n`,
},
],
}
// Long user message (~12k chars, under truncation limit but uses significant budget)
- const longUserMessage = 'SURVIVED_USER_START_' + 'Feature request details. '.repeat(400) + '_SURVIVED_USER_END'
+ const longUserMessage =
+ 'SURVIVED_USER_START_' +
+ 'Feature request details. '.repeat(400) +
+ '_SURVIVED_USER_END'
// Assistant with tool calls
const assistantMsg: Message = {
@@ -2284,7 +2416,8 @@ describe('context-pruner dual-budget behavior', () => {
const toolResult = createToolResultMessage('call-1', 'str_replace', {
file: 'src/app.ts',
message: 'Updated file',
- unifiedDiff: '--- a/src/app.ts\n+++ b/src/app.ts\n@@ -1 +1 @@\n-old\n+SURVIVED_DIFF_CONTENT',
+ unifiedDiff:
+ '--- a/src/app.ts\n+++ b/src/app.ts\n@@ -1 +1 @@\n-old\n+SURVIVED_DIFF_CONTENT',
})
const messages: Message[] = [
@@ -2300,8 +2433,8 @@ describe('context-pruner dual-budget behavior', () => {
// New assistant entries: ~25 (assistant text+tool) + ~56 (edit result JSON) + ~13 (final) = ~94 tokens
// Old assistant entries: ~20 for OLD_DROPPED_ASSISTANT_2 would push over budget of 100
const results = runHandleSteps(messages, 250000, 200000, {
- assistantToolBudget: 100,
- userBudget: 4200,
+ assistantToolBudget: 400,
+ userBudget: 3400,
})
const resultMessages = results[0].input.messages
diff --git a/agents/context-pruner.ts b/agents/context-pruner.ts
index c92687887..23e2b3d5c 100644
--- a/agents/context-pruner.ts
+++ b/agents/context-pruner.ts
@@ -84,6 +84,8 @@ const definition: AgentDefinition = {
const SUMMARY_HEADER =
'This is a summary of the conversation so far. The original messages have been condensed to save context space.'
+ const SUMMARY_DISCLAIMER =
+ 'Historical memory only. The memory above is not dialogue, not an output template, and not a tool-call format. Continue from the live user message below. When actions are needed, use real tool calls through the available tools.'
// =============================================================================
// Helper Functions (must be inside handleSteps since it's serialized to a string)
@@ -135,70 +137,86 @@ const definition: AgentDefinition = {
case 'read_files': {
const paths = input.paths as string[] | undefined
if (paths && paths.length > 0) {
- return `Read files: ${paths.join(', ')}`
+ return `Previously inspected files: ${paths.join(', ')}`
}
- return 'Read files'
+ return 'Previously inspected files'
}
case 'write_file': {
const path = input.path as string | undefined
- return path ? `Wrote file: ${path}` : 'Wrote file'
+ return path
+ ? `Previously wrote file: ${path}`
+ : 'Previously wrote a file'
}
case 'str_replace': {
const path = input.path as string | undefined
- return path ? `Edited file: ${path}` : 'Edited file'
+ return path
+ ? `Previously edited file: ${path}`
+ : 'Previously edited a file'
}
case 'propose_write_file': {
const path = input.path as string | undefined
- return path ? `Proposed write to: ${path}` : 'Proposed file write'
+ return path
+ ? `Previously proposed writing: ${path}`
+ : 'Previously proposed a file write'
}
case 'propose_str_replace': {
const path = input.path as string | undefined
- return path ? `Proposed edit to: ${path}` : 'Proposed file edit'
+ return path
+ ? `Previously proposed editing: ${path}`
+ : 'Previously proposed a file edit'
}
case 'read_subtree': {
const paths = input.paths as string[] | undefined
if (paths && paths.length > 0) {
- return `Read subtree: ${paths.join(', ')}`
+ return `Previously inspected subtrees: ${paths.join(', ')}`
}
- return 'Read subtree'
+ return 'Previously inspected a subtree'
}
case 'code_search': {
const pattern = input.pattern as string | undefined
const flags = input.flags as string | undefined
if (pattern && flags) {
- return `Code search: "${pattern}" (${flags})`
+ return `Previous code search for "${pattern}" (${flags})`
}
- return pattern ? `Code search: "${pattern}"` : 'Code search'
+ return pattern
+ ? `Previous code search for "${pattern}"`
+ : 'Previous code search'
}
case 'glob': {
const pattern = input.pattern as string | undefined
- return pattern ? `Glob: ${pattern}` : 'Glob search'
+ return pattern
+ ? `Previous glob search for ${pattern}`
+ : 'Previous glob search'
}
case 'list_directory': {
const path = input.path as string | undefined
- return path ? `Listed dir: ${path}` : 'Listed directory'
+ return path
+ ? `Previously listed directory: ${path}`
+ : 'Previously listed a directory'
}
case 'find_files': {
const prompt = input.prompt as string | undefined
- return prompt ? `Find files: "${prompt}"` : 'Find files'
+ return prompt
+ ? `Previous file-finding request: "${prompt}"`
+ : 'Previous file-finding request'
}
case 'run_terminal_command': {
const command = input.command as string | undefined
if (command) {
const shortCmd =
command.length > 50 ? command.slice(0, 50) + '...' : command
- return `Ran command: ${shortCmd}`
+ return `Previously ran command: ${shortCmd}`
}
- return 'Ran terminal command'
+ return 'Previously ran a terminal command'
}
case 'spawn_agents':
case 'spawn_agent_inline': {
const agents = input.agents as
| Array<{
- agent_type: string
- prompt?: string
- params?: Record
- }>
+ agent_type: string
+ prompt?: string
+ params?: Record
+ }>
| undefined
const agentType = input.agent_type as string | undefined
const prompt = input.prompt as string | undefined
@@ -230,7 +248,7 @@ const definition: AgentDefinition = {
}
return detail
})
- return `Spawned agents:\n${agentDetails.map((d) => `- ${d}`).join('\n')}`
+ return `Previously delegated agents:\n${agentDetails.map((d) => `- ${d}`).join('\n')}`
}
if (agentType) {
const extras: string[] = []
@@ -248,11 +266,11 @@ const definition: AgentDefinition = {
extras.push(`params: ${truncatedParams}`)
}
if (extras.length > 0) {
- return `Spawned agent: ${agentType} (${extras.join(', ')})`
+ return `Previously delegated agent ${agentType} (${extras.join(', ')})`
}
- return `Spawned agent: ${agentType}`
+ return `Previously delegated agent ${agentType}`
}
- return 'Spawned agent(s)'
+ return 'Previously delegated agent work'
}
case 'write_todos': {
const todos = input.todos as
@@ -289,30 +307,36 @@ const definition: AgentDefinition = {
return 'Suggested followups'
case 'web_search': {
const query = input.query as string | undefined
- return query ? `Web search: "${query}"` : 'Web search'
+ return query
+ ? `Previous web search for "${query}"`
+ : 'Previous web search'
}
case 'gravity_index': {
const query = input.query as string | undefined
const action = input.action as string | undefined
if (query) {
- return `Gravity Index ${action ?? 'search'}: "${query}"`
+ return `Previous Gravity Index ${action ?? 'search'} for "${query}"`
}
- return action ? `Gravity Index ${action}` : 'Gravity Index'
+ return action
+ ? `Previous Gravity Index ${action}`
+ : 'Previous Gravity Index use'
}
case 'read_docs': {
const libraryTitle = input.libraryTitle as string | undefined
const topic = input.topic as string | undefined
if (libraryTitle && topic) {
- return `Read docs: ${libraryTitle} - ${topic}`
+ return `Previously consulted docs: ${libraryTitle} - ${topic}`
}
- return libraryTitle ? `Read docs: ${libraryTitle}` : 'Read docs'
+ return libraryTitle
+ ? `Previously consulted docs: ${libraryTitle}`
+ : 'Previously consulted docs'
}
case 'set_output':
- return 'Set output'
+ return 'Previously set structured output'
case 'set_messages':
- return 'Set messages'
+ return 'Previously updated message history'
default:
- return `Used tool: ${toolName}`
+ return `Previously used tool ${toolName}`
}
}
@@ -377,7 +401,11 @@ const definition: AgentDefinition = {
// - Prune when context exceeds max, OR
// - Prune when prompt cache will miss (>5 min gap) to take advantage of fresh context
// If not, return messages with just the subagent-specific tags removed
- if (agentState.contextTokenCount + TOKEN_COUNT_FUDGE_FACTOR <= maxContextLength && !cacheWillMiss) {
+ if (
+ agentState.contextTokenCount + TOKEN_COUNT_FUDGE_FACTOR <=
+ maxContextLength &&
+ !cacheWillMiss
+ ) {
yield {
toolName: 'set_messages',
input: { messages: currentMessages },
@@ -404,7 +432,8 @@ const definition: AgentDefinition = {
// 2. Walk backwards through summarized parts to apply token budgets
// 3. Older summarized parts beyond the budgets are dropped
- const assistantToolBudget: number = params?.assistantToolBudget ?? ASSISTANT_TOOL_BUDGET
+ const assistantToolBudget: number =
+ params?.assistantToolBudget ?? ASSISTANT_TOOL_BUDGET
const userBudget: number = params?.userBudget ?? USER_BUDGET
function shouldExcludeMessage(message: Message): boolean {
@@ -429,6 +458,12 @@ const definition: AgentDefinition = {
if (content.startsWith(SUMMARY_HEADER)) {
content = content.slice(SUMMARY_HEADER.length).trim()
}
+ const memoryMatch = content.match(
+ /([\s\S]*?)<\/historical_memory>/,
+ )
+ if (memoryMatch) {
+ content = memoryMatch[1].trim()
+ }
return content
}
@@ -449,7 +484,10 @@ const definition: AgentDefinition = {
const trimmed = chunk.trim()
const isUser =
trimmed.startsWith('[USER]\n') ||
- trimmed.startsWith('[USER] [with image')
+ trimmed.startsWith('[USER] [with image') ||
+ trimmed.startsWith('User request') ||
+ trimmed.startsWith('User message') ||
+ trimmed.startsWith('Current unresolved user request')
return {
role: isUser ? ('user' as const) : ('assistant_tool' as const),
parts: [trimmed],
@@ -465,10 +503,37 @@ const definition: AgentDefinition = {
}
}
- // Filter out excluded and conversation summary messages for summarization
- const messagesToSummarize = currentMessages.filter(
- (message) => !shouldExcludeMessage(message) && !isConversationSummary(message),
+ // If pruning happens before the assistant has started responding to the
+ // current user prompt, preserve that prompt as a real message after the
+ // memory artifact. If pruning happens mid-turn, keep the prompt in the
+ // historical memory with the assistant/tool progress that followed it and
+ // append a synthetic continuation prompt instead.
+ const latestLiveUserPromptIndex = currentMessages.findLastIndex((message) =>
+ message.tags?.includes('USER_PROMPT'),
)
+ const latestLiveUserPromptMessage =
+ latestLiveUserPromptIndex !== -1
+ ? currentMessages[latestLiveUserPromptIndex]
+ : null
+ const isMidTurnPrune =
+ latestLiveUserPromptIndex !== -1 &&
+ currentMessages
+ .slice(latestLiveUserPromptIndex + 1)
+ .some(
+ (message) =>
+ !shouldExcludeMessage(message) && !isConversationSummary(message),
+ )
+
+ // Filter out excluded, conversation summary, and live-prompt messages for summarization
+ const messagesToSummarize = currentMessages
+ .filter(
+ (_message, index) =>
+ isMidTurnPrune || index !== latestLiveUserPromptIndex,
+ )
+ .filter(
+ (message) =>
+ !shouldExcludeMessage(message) && !isConversationSummary(message),
+ )
// Find the last user message with images to preserve in the final output
let lastUserImageParts: Array> = []
@@ -487,7 +552,10 @@ const definition: AgentDefinition = {
}
// Phase 1: Summarize ALL messages into tagged entries
- const summarizedEntries: Array<{ role: 'user' | 'assistant_tool'; parts: string[] }> = []
+ const summarizedEntries: Array<{
+ role: 'user' | 'assistant_tool'
+ parts: string[]
+ }> = []
for (const message of messagesToSummarize) {
if (message.role === 'user') {
@@ -501,10 +569,10 @@ const definition: AgentDefinition = {
part.type === 'image' || part.type === 'media',
)
}
- const imageNote = hasImages ? ' [with image(s)]' : ''
+ const imageNote = hasImages ? ' [image(s) were attached]' : ''
summarizedEntries.push({
role: 'user',
- parts: [`[USER]${imageNote}\n${text}`],
+ parts: [`User request${imageNote}:\n${text}`],
})
}
} else if (message.role === 'assistant') {
@@ -531,17 +599,20 @@ const definition: AgentDefinition = {
const parts: string[] = []
if (textParts.length > 0) {
let combinedText = textParts.join('\n')
- combinedText = truncateLongText(combinedText, ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN)
- parts.push(combinedText)
+ combinedText = truncateLongText(
+ combinedText,
+ ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN,
+ )
+ parts.push(`Progress note:\n${combinedText}`)
}
if (toolSummaries.length > 0) {
- parts.push(toolSummaries.join('; '))
+ parts.push(`Prior action record:\n${toolSummaries.join('\n')}`)
}
if (parts.length > 0) {
summarizedEntries.push({
role: 'assistant_tool',
- parts: [`[ASSISTANT]\n${parts.join('\n')}`],
+ parts,
})
}
} else if (message.role === 'tool') {
@@ -559,7 +630,7 @@ const definition: AgentDefinition = {
errorText = errorText.slice(0, 100) + '...'
}
entryParts.push(
- `[TOOL ERROR: ${toolMessage.toolName}] ${errorText}`,
+ `Tool error from ${toolMessage.toolName}: ${errorText}`,
)
}
@@ -569,20 +640,20 @@ const definition: AgentDefinition = {
) {
const exitCode = value.exitCode as number
if (exitCode !== 0) {
- entryParts.push(`[COMMAND FAILED] Exit code: ${exitCode}`)
+ entryParts.push(`Command failed with exit code: ${exitCode}`)
}
}
if (toolMessage.toolName === 'ask_user') {
if (value.skipped) {
- entryParts.push('[USER SKIPPED QUESTION]')
+ entryParts.push('User skipped question')
} else if ('answers' in value) {
const answers = value.answers as
| Array<{
- selectedOption?: string
- selectedOptions?: string[]
- otherText?: string
- }>
+ selectedOption?: string
+ selectedOptions?: string[]
+ otherText?: string
+ }>
| undefined
if (answers && answers.length > 0) {
const answerTexts = answers
@@ -598,7 +669,7 @@ const definition: AgentDefinition = {
answerTexts.length > 10_000
? answerTexts.slice(0, 10_000) + '...'
: answerTexts
- entryParts.push(`[USER ANSWERED] ${truncated}`)
+ entryParts.push(`User answered: ${truncated}`)
}
}
}
@@ -615,7 +686,7 @@ const definition: AgentDefinition = {
? resultStr.slice(0, 2000) + '...'
: resultStr
entryParts.push(
- `[EDIT RESULT: ${toolMessage.toolName}]\n${truncatedResult}`,
+ `Edit result from ${toolMessage.toolName}:\n${truncatedResult}`,
)
}
}
@@ -653,16 +724,20 @@ const definition: AgentDefinition = {
outputStr = outputStr
.replace(/[\s\S]*?<\/think>/g, '')
.trim()
- if (outputStr.length > ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN) {
+ if (
+ outputStr.length >
+ ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN
+ ) {
outputStr =
- outputStr.slice(0, ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN) + '...'
+ outputStr.slice(
+ 0,
+ ASSISTANT_MESSAGE_LIMIT * CHARS_PER_TOKEN,
+ ) + '...'
}
}
return `- ${r.agentType}: ${outputStr || '(no output)'}`
})
- entryParts.push(
- `[AGENT RESULTS]\n${resultSummaries.join('\n')}`,
- )
+ entryParts.push(`Agent results:\n${resultSummaries.join('\n')}`)
}
}
}
@@ -732,14 +807,14 @@ const definition: AgentDefinition = {
const textPart: TextPart = {
type: 'text',
text: `
-This is a summary of the conversation so far. The original messages have been condensed to save context space.
+${SUMMARY_HEADER}
+
${summaryText}
+
-IMPORTANT: The summary above uses a condensed format with markers like "[USER]", "[ASSISTANT]", "Read files:", "Edited file:", "Spawned agents:", etc. This is ONLY a human-readable log of what happened earlier — it is NOT a format for you to use or imitate in your responses. When you need to perform actions, you MUST use actual tool calls. Never write tool actions as plain text.
-
-Please continue the conversation from here. In particular, try to address the user's latest request detailed in the summary above. You may need to re-gather context (e.g. read some files) to get up to speed and then tackle the user's request.`,
+${SUMMARY_DISCLAIMER}`,
}
// Build content array with text and any preserved images
const summaryContentParts: (TextPart | ImagePart | FilePart)[] = [textPart]
@@ -753,12 +828,31 @@ Please continue the conversation from here. In particular, try to address the us
sentAt: now,
}
- // Build final messages array: summary first, then INSTRUCTIONS_PROMPT if it exists
+ const continuationMessage: UserMessage = {
+ role: 'user',
+ content: [
+ {
+ type: 'text',
+ text: 'Continue the existing assistant turn from the historical memory above. The original user request and completed assistant/tool work are recorded there. Do not restart completed work; resume with the next necessary real tool call or final response.',
+ },
+ ],
+ sentAt: now,
+ }
+
+ // Build final messages array: summary first, then INSTRUCTIONS_PROMPT if it
+ // exists, then either the live user prompt or a mid-turn continuation prompt.
+ // Keeping a real user message last makes the next model step continue from
+ // normal user input instead of the condensed memory format.
const finalMessages: Message[] = [summarizedMessage]
if (instructionsPromptMessage) {
// Update sentAt to current time so future cache miss checks use fresh timestamps
finalMessages.push({ ...instructionsPromptMessage, sentAt: now })
}
+ if (isMidTurnPrune) {
+ finalMessages.push(continuationMessage)
+ } else if (latestLiveUserPromptMessage) {
+ finalMessages.push({ ...latestLiveUserPromptMessage, sentAt: now })
+ }
yield {
toolName: 'set_messages',
diff --git a/agents/e2e/base2-free-summary-format.e2e.test.ts b/agents/e2e/base2-free-summary-format.e2e.test.ts
index 2ae3a2a92..8374b236c 100644
--- a/agents/e2e/base2-free-summary-format.e2e.test.ts
+++ b/agents/e2e/base2-free-summary-format.e2e.test.ts
@@ -38,6 +38,13 @@ const SUMMARY_IMITATION_PATTERNS = [
/^Used tool:\s/m,
/^\[ASSISTANT\]\n/m,
/^\[USER\]\n/m,
+ /^User request(?:\s|\[|:)/m,
+ /^Progress note:\s/m,
+ /^Prior action record:\s/m,
+ /^Previously inspected files:\s/m,
+ /^Previously edited file:\s/m,
+ /^Previously delegated agents:\s*\n/m,
+ /^Edit result from \w+:/m,
]
/**
@@ -59,8 +66,8 @@ function detectSummaryImitation(text: string): string[] {
/**
* Creates a pre-summarized conversation that mimics what the context pruner produces.
- * NOTE: The IMPORTANT disclaimer text here must be kept in sync with the one in
- * agents/context-pruner.ts. If you change the disclaimer there, update it here too.
+ * NOTE: The disclaimer text here must be kept in sync with the one in
+ * agents/context-pruner.ts. If you change the memory artifact format there, update it here too.
*/
function createSummarizedConversation(): Message {
return {
@@ -71,44 +78,50 @@ function createSummarizedConversation(): Message {
text: `
This is a summary of the conversation so far. The original messages have been condensed to save context space.
-[USER]
+
+User request:
The user asked to set up a new TypeScript project with a simple utility file at src/utils.ts containing a helper function called formatDate.
---
-[ASSISTANT]
+Progress note:
Sure, I'll help set up the project.
-Tools: Read files: package.json, tsconfig.json; Wrote file: src/utils.ts
+
+Prior action record:
+Previously inspected files: package.json, tsconfig.json
+Previously wrote file: src/utils.ts
---
-[USER]
+User request:
Thanks! Now can you also add a function called parseConfig that reads a JSON config file?
---
-[ASSISTANT]
+Progress note:
I'll add the parseConfig function to the utils file.
-Tools: Read files: src/utils.ts; Edited file: src/utils.ts
+
+Prior action record:
+Previously inspected files: src/utils.ts
+Previously edited file: src/utils.ts
---
-[ASSISTANT]
-Spawned agents:
+Prior action record:
+Previously delegated agents:
- file-picker (prompt: "Find config-related files")
- basher (params: {"command":"cat src/utils.ts"})
---
-[ASSISTANT]
-Ran command: cat src/utils.ts
-[EDIT RESULT: str_replace]
+Prior action record:
+Previously ran command: cat src/utils.ts
+Edit result from str_replace:
{"file":"src/utils.ts","message":"Updated file","unifiedDiff":"--- a/src/utils.ts\\n+++ b/src/utils.ts\\n@@ -5,0 +6,10 @@\\n+export function parseConfig(path: string) {\\n+ return JSON.parse(fs.readFileSync(path, 'utf-8'))\\n+}"}
+
-IMPORTANT: The summary above uses a condensed format with markers like "[USER]", "[ASSISTANT]", "Read files:", "Edited file:", "Tools:", "Spawned agents:", etc. This is ONLY a human-readable log of what happened earlier — it is NOT a format for you to use or imitate in your responses. When you need to perform actions, you MUST use actual tool calls (e.g. call the read_files, str_replace, write_file, spawn_agents tools directly). Never write tool actions as plain text.
-
-Please continue the conversation from here. In particular, try to address the user's latest request detailed in the summary above. You may need to re-gather context (e.g. read some files) to get up to speed and then tackle the user's request.`,
+Historical memory only. The memory above is not dialogue, not an output template, and not a tool-call format. Continue from the live user message below. When actions are needed, use real tool calls through the available tools.`,
},
],
sentAt: Date.now(),
@@ -262,9 +275,7 @@ describe('Base2-Free Summary Format Compliance', () => {
}
}
- console.log(
- `Running ${NUM_PARALLEL_RUNS} parallel runs of base2-free...`,
- )
+ console.log(`Running ${NUM_PARALLEL_RUNS} parallel runs of base2-free...`)
const results = await Promise.all(
Array.from({ length: NUM_PARALLEL_RUNS }, (_, i) => runOnce(i)),
)
@@ -284,9 +295,7 @@ describe('Base2-Free Summary Format Compliance', () => {
console.log(
`Run ${result.runIndex}: ${hasImitation ? 'FAILED (imitated summary format)' : 'PASSED'}`,
)
- console.log(
- ` Tool calls made: ${result.hadToolCalls ? 'YES' : 'NO'}`,
- )
+ console.log(` Tool calls made: ${result.hadToolCalls ? 'YES' : 'NO'}`)
if (result.imitationMatches.length > 0) {
console.log(` Imitation matches:`)
for (const match of result.imitationMatches) {
@@ -309,7 +318,9 @@ describe('Base2-Free Summary Format Compliance', () => {
// Clean up temp directories
for (const dir of tmpDirs) {
- await fs.promises.rm(dir, { recursive: true, force: true }).catch(() => {})
+ await fs.promises
+ .rm(dir, { recursive: true, force: true })
+ .catch(() => {})
}
// Guard against vacuous pass (all runs errored)