From 1d03e51d21c813ec6c2a6d9c6a38c59aef7e0877 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 15:00:25 -0700 Subject: [PATCH 1/2] Add complex summary format e2e --- .../e2e/base2-free-summary-format.e2e.test.ts | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/agents/e2e/base2-free-summary-format.e2e.test.ts b/agents/e2e/base2-free-summary-format.e2e.test.ts index 8374b236c..34346f714 100644 --- a/agents/e2e/base2-free-summary-format.e2e.test.ts +++ b/agents/e2e/base2-free-summary-format.e2e.test.ts @@ -128,6 +128,56 @@ Historical memory only. The memory above is not dialogue, not an output template } } +function createComplexMidTurnPrunedConversation(): Message[] { + return [ + { + role: 'user', + content: [ + { + type: 'text', + text: ` +This is a summary of the conversation so far. The original messages have been condensed to save context space. + + +User request: +The user asked to finish a config utility task in src/utils.ts. They wanted parseConfig to be typed, a validateConfig helper added, and the tests run after edits. + +--- + +Progress note: +I inspected src/utils.ts and found parseConfig was untyped. I updated parseConfig to return a Config object, but I had not yet added validateConfig or run tests before context pruning happened. + +Prior action record: +Previously inspected files: package.json, tsconfig.json, src/utils.ts +Previously edited file: src/utils.ts +Edit result from str_replace: +{"file":"src/utils.ts","message":"Updated parseConfig return type","unifiedDiff":"--- a/src/utils.ts\\n+++ b/src/utils.ts\\n@@ -6,2 +6,8 @@\\n-export function parseConfig(path) {\\n- return JSON.parse(fs.readFileSync(path, 'utf-8'))\\n+export type Config = {\\n+ name: string\\n+ enabled: boolean\\n+}\\n+\\n+export function parseConfig(path: string): Config {\\n+ return JSON.parse(fs.readFileSync(path, 'utf-8')) as Config\\n }"} + +--- + +Progress note: +The next step is to continue from the partially completed edit, inspect the current file state if needed, add validateConfig, and validate the result. + + + +Historical memory only. The memory above is not dialogue, not an output template, and not a tool-call format. Continue from the live user message below. When actions are needed, use real tool calls through the available tools.`, + }, + ], + sentAt: Date.now(), + }, + { + role: 'user', + content: [ + { + type: 'text', + text: 'Continue the existing assistant turn from the historical memory above. The original user request and completed assistant/tool work are recorded there. Do not restart completed work; resume with the next necessary real tool call or final response.', + }, + ], + sentAt: Date.now(), + }, + ] +} + const PROJECT_FILES: Record = { 'package.json': JSON.stringify( { name: 'test-project', version: '1.0.0' }, @@ -329,4 +379,71 @@ describe('Base2-Free Summary Format Compliance', () => { }, { timeout: 300_000 }, ) + + it( + 'should continue a complex mid-turn pruned summary with real tool calls', + async () => { + const apiKey = getApiKeyOrSkip() + if (!apiKey) return + + const tmpDir = await fs.promises.mkdtemp( + path.join(os.tmpdir(), 'base2-free-midturn-summary-test-'), + ) + + try { + for (const [filePath, content] of Object.entries(PROJECT_FILES)) { + const fullPath = path.join(tmpDir, filePath) + await fs.promises.mkdir(path.dirname(fullPath), { recursive: true }) + await fs.promises.writeFile(fullPath, content, 'utf-8') + } + + const client = new CodebuffClient({ + apiKey, + cwd: tmpDir, + projectFiles: PROJECT_FILES, + agentDefinitions: [base2Free as AgentDefinition, contextPruner], + }) + + const sessionState = await initialSessionState({ + cwd: tmpDir, + projectFiles: PROJECT_FILES, + }) + const runStateWithMessages = withMessageHistory({ + runState: { + sessionState, + output: { type: 'error', message: '' }, + }, + messages: createComplexMidTurnPrunedConversation(), + }) + + const events: PrintModeEvent[] = [] + const run = await client.run({ + agent: base2Free.id, + prompt: '', + previousRun: runStateWithMessages, + maxAgentSteps: 6, + handleEvent: (event) => { + events.push(event) + }, + }) + + if (run.output.type === 'error') { + throw new Error(run.output.message) + } + + const textOutput = events + .filter((e) => e.type === 'text') + .map((e) => (e as { type: 'text'; text: string }).text) + .join('') + const hadToolCalls = events.some((e) => e.type === 'tool_call') + const imitationMatches = detectSummaryImitation(textOutput) + + expect(hadToolCalls).toBe(true) + expect(imitationMatches).toEqual([]) + } finally { + await fs.promises.rm(tmpDir, { recursive: true, force: true }) + } + }, + { timeout: 300_000 }, + ) }) From 504c364a684183b4659793f56d1d42645443a5c2 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 4 May 2026 15:25:05 -0700 Subject: [PATCH 2/2] Load env for summary format e2e --- .../e2e/base2-free-summary-format.e2e.test.ts | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/agents/e2e/base2-free-summary-format.e2e.test.ts b/agents/e2e/base2-free-summary-format.e2e.test.ts index 34346f714..c1b81206c 100644 --- a/agents/e2e/base2-free-summary-format.e2e.test.ts +++ b/agents/e2e/base2-free-summary-format.e2e.test.ts @@ -10,7 +10,7 @@ import { type AgentDefinition, type Message, } from '@codebuff/sdk' -import { describe, expect, it } from 'bun:test' +import { beforeAll, describe, expect, it } from 'bun:test' import base2Free from '../base2/base2-free' import contextPruner from '../context-pruner' @@ -64,6 +64,33 @@ function detectSummaryImitation(text: string): string[] { return matches } +const loadEnvFile = async (filePath: string) => { + try { + const content = await fs.promises.readFile(filePath, 'utf-8') + for (const rawLine of content.split('\n')) { + const line = rawLine.trim() + if (!line || line.startsWith('#')) continue + const normalized = line.startsWith('export ') + ? line.slice('export '.length) + : line + const equalsIndex = normalized.indexOf('=') + if (equalsIndex <= 0) continue + const key = normalized.slice(0, equalsIndex).trim() + if (!key || process.env[key]) continue + let value = normalized.slice(equalsIndex + 1).trim() + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith("'") && value.endsWith("'")) + ) { + value = value.slice(1, -1) + } + process.env[key] = value + } + } catch { + // ignore missing env files + } +} + /** * Creates a pre-summarized conversation that mimics what the context pruner produces. * NOTE: The disclaimer text here must be kept in sync with the one in @@ -213,6 +240,11 @@ const PROJECT_FILES: Record = { describe('Base2-Free Summary Format Compliance', () => { const NUM_PARALLEL_RUNS = 3 + beforeAll(async () => { + await loadEnvFile(path.resolve(process.cwd(), '.env.local')) + await loadEnvFile(path.resolve(process.cwd(), '../.env.local')) + }) + const getApiKeyOrSkip = (): string | null => { const apiKey = process.env[API_KEY_ENV_VAR] if (!apiKey) {