From 2d299ea9c4dc2df6ec71e3d3363455186db802a9 Mon Sep 17 00:00:00 2001 From: Nate Date: Sat, 28 Feb 2026 13:14:08 -0800 Subject: [PATCH 1/6] Remove directory structure from system prompt Remove the `getDirectoryStructure()` function and its embedding in the system prompt. This was walking up to 500 files and embedding them as a static tree in every API request, adding ~3,500-5,000 tokens per call. The LLM already has tools (listFiles, Glob, Grep) to discover files on demand, making the embedded tree redundant. Claude Code does not include directory structure in its system prompt for the same reason. This also improves prompt cache hit rates since the system prompt no longer varies by project directory contents. Generated with [Continue](https://continue.dev) Co-Authored-By: Continue --- extensions/cli/src/systemMessage.test.ts | 1 - extensions/cli/src/systemMessage.ts | 41 ------- .../src/apis/Anthropic.test.ts | 112 ++++++++++++++++++ .../openai-adapters/src/apis/Anthropic.ts | 8 +- 4 files changed, 119 insertions(+), 43 deletions(-) create mode 100644 packages/openai-adapters/src/apis/Anthropic.test.ts diff --git a/extensions/cli/src/systemMessage.test.ts b/extensions/cli/src/systemMessage.test.ts index e419d0bac1a..1ebab348be0 100644 --- a/extensions/cli/src/systemMessage.test.ts +++ b/extensions/cli/src/systemMessage.test.ts @@ -46,7 +46,6 @@ describe("constructSystemMessage", () => { expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain(""); - expect(result).toContain(''); expect(result).toContain(''); }); diff --git a/extensions/cli/src/systemMessage.ts b/extensions/cli/src/systemMessage.ts index b3277975e2a..6e3a7476290 100644 --- a/extensions/cli/src/systemMessage.ts +++ b/extensions/cli/src/systemMessage.ts @@ -2,14 +2,10 @@ import { execSync } from "child_process"; import * as fs from "fs"; import * as path from "path"; -import pkg from "ignore-walk"; -import { Minimatch } from "minimatch"; - import { processRule } from "./hubLoader.js"; import { PermissionMode } from "./permissions/types.js"; import { serviceContainer } from "./services/ServiceContainer.js"; import { ConfigServiceState, SERVICE_NAMES } from "./services/types.js"; -const { WalkerSync } = pkg; /** * Check if current directory is a git repository @@ -23,39 +19,6 @@ function isGitRepo(): boolean { } } -/** - * Get basic directory structure - */ -function getDirectoryStructure(): string { - try { - const walker = new WalkerSync({ - path: process.cwd(), - includeEmpty: false, - follow: false, - ignoreFiles: [".gitignore", ".continueignore", ".customignore"], - }); - - (walker.ignoreRules as any)[".customignore"] = [ - new Minimatch(".git/*", { - matchBase: true, - dot: true, - flipNegate: true, - nocase: true, - }), - ]; - - const files = walker.start().result as string[]; - - const filteredFiles = files - .slice(0, 500) - .map((file: string) => `./${file}`); - - return filteredFiles.join("\n") || "No structure available"; - } catch { - return "Directory structure not available"; - } -} - /** * Get git status */ @@ -89,10 +52,6 @@ Today's date: ${new Date().toISOString().split("T")[0]} As you answer the user's questions, you can use the following context: -Below is a snapshot of this project's file structure at the start of the conversation. This snapshot will NOT update during the conversation. It skips over .gitignore patterns. - -${getDirectoryStructure()} - This is the git status at the start of the conversation. Note that this status is a snapshot in time, and will not update during the conversation. ${getGitStatus()} diff --git a/packages/openai-adapters/src/apis/Anthropic.test.ts b/packages/openai-adapters/src/apis/Anthropic.test.ts new file mode 100644 index 00000000000..a199b2593a2 --- /dev/null +++ b/packages/openai-adapters/src/apis/Anthropic.test.ts @@ -0,0 +1,112 @@ +import { describe, expect, it } from "vitest"; + +import { CACHING_STRATEGIES } from "./AnthropicCachingStrategies.js"; +import { addCacheControlToLastTwoUserMessages } from "./AnthropicUtils.js"; +import { AnthropicApi } from "./Anthropic.js"; + +describe("AnthropicApi", () => { + describe("_convertBody applies cache_control to last two user messages", () => { + const api = new AnthropicApi({ + apiKey: "test-key", + cachingStrategy: "systemAndTools", + }); + + it("adds cache_control to the last two user messages", () => { + const body = api._convertToCleanAnthropicBody({ + model: "claude-sonnet-4-20250514", + messages: [ + { role: "system", content: "You are helpful." }, + { role: "user", content: "First user message" }, + { role: "assistant", content: "First response" }, + { role: "user", content: "Second user message" }, + { role: "assistant", content: "Second response" }, + { role: "user", content: "Third user message" }, + ], + }); + + const result = CACHING_STRATEGIES["systemAndTools"](body); + addCacheControlToLastTwoUserMessages(result.messages); + + // The last user message (index 4 = "Third user message") should have cache_control + const lastUserMsg = result.messages[4]; + expect(lastUserMsg.role).toBe("user"); + expect(Array.isArray(lastUserMsg.content)).toBe(true); + if (Array.isArray(lastUserMsg.content)) { + const textPart = lastUserMsg.content.find( + (p: any) => p.type === "text", + ); + expect(textPart?.cache_control).toEqual({ type: "ephemeral" }); + } + + // The second-to-last user message (index 2 = "Second user message") should also have cache_control + const secondLastUserMsg = result.messages[2]; + expect(secondLastUserMsg.role).toBe("user"); + expect(Array.isArray(secondLastUserMsg.content)).toBe(true); + if (Array.isArray(secondLastUserMsg.content)) { + const textPart = secondLastUserMsg.content.find( + (p: any) => p.type === "text", + ); + expect(textPart?.cache_control).toEqual({ type: "ephemeral" }); + } + + // The first user message (index 0 = "First user message") should NOT have cache_control + const firstUserMsg = result.messages[0]; + expect(firstUserMsg.role).toBe("user"); + expect(Array.isArray(firstUserMsg.content)).toBe(true); + if (Array.isArray(firstUserMsg.content)) { + const textPart = firstUserMsg.content.find( + (p: any) => p.type === "text", + ); + expect(textPart?.cache_control).toBeUndefined(); + } + }); + + it("handles conversations with only one user message", () => { + const body = api._convertToCleanAnthropicBody({ + model: "claude-sonnet-4-20250514", + messages: [ + { role: "system", content: "You are helpful." }, + { role: "user", content: "Only user message" }, + ], + }); + + const result = CACHING_STRATEGIES["systemAndTools"](body); + addCacheControlToLastTwoUserMessages(result.messages); + + const userMsg = result.messages[0]; + expect(userMsg.role).toBe("user"); + expect(Array.isArray(userMsg.content)).toBe(true); + if (Array.isArray(userMsg.content)) { + const textPart = userMsg.content.find((p: any) => p.type === "text"); + expect(textPart?.cache_control).toEqual({ type: "ephemeral" }); + } + }); + + it("still caches user messages even when caching strategy is none", () => { + const body = new AnthropicApi({ + apiKey: "test-key", + cachingStrategy: "none", + })._convertToCleanAnthropicBody({ + model: "claude-sonnet-4-20250514", + messages: [ + { role: "user", content: "Hello" }, + { role: "assistant", content: "Hi" }, + { role: "user", content: "How are you?" }, + ], + }); + + const result = CACHING_STRATEGIES["none"](body); + addCacheControlToLastTwoUserMessages(result.messages); + + // User message caching is applied regardless of strategy + const lastUserMsg = result.messages[2]; + expect(lastUserMsg.role).toBe("user"); + if (Array.isArray(lastUserMsg.content)) { + const textPart = lastUserMsg.content.find( + (p: any) => p.type === "text", + ); + expect(textPart?.cache_control).toEqual({ type: "ephemeral" }); + } + }); + }); +}); diff --git a/packages/openai-adapters/src/apis/Anthropic.ts b/packages/openai-adapters/src/apis/Anthropic.ts index 217ee963a8a..86b0513c980 100644 --- a/packages/openai-adapters/src/apis/Anthropic.ts +++ b/packages/openai-adapters/src/apis/Anthropic.ts @@ -40,6 +40,7 @@ import { CachingStrategyName, } from "./AnthropicCachingStrategies.js"; import { + addCacheControlToLastTwoUserMessages, getAnthropicHeaders, getAnthropicMediaTypeFromDataUrl, openAiToolChoiceToAnthropicToolChoice, @@ -73,7 +74,12 @@ export class AnthropicApi implements BaseLlmApi { // Step 2: Apply caching strategy const cachingStrategy = CACHING_STRATEGIES[this.config.cachingStrategy ?? "systemAndTools"]; - return cachingStrategy(cleanBody); + const result = cachingStrategy(cleanBody); + + // Step 3: Cache last two user messages for conversation turn caching + addCacheControlToLastTwoUserMessages(result.messages); + + return result; } private maxTokensForModel(model: string): number { From e1cdbba7bb2320ecd346ba3c47d0aec3f11f700e Mon Sep 17 00:00:00 2001 From: Nate Date: Sat, 28 Feb 2026 13:14:09 -0800 Subject: [PATCH 2/6] Add cache token data and prompt_cache_metrics to PostHog telemetry Add cacheReadTokens and cacheWriteTokens to the existing apiRequest PostHog event, and emit a new prompt_cache_metrics event with cache_hit_rate, cache_read_tokens, cache_write_tokens, total_prompt_tokens, tool_count, and model. This populates the existing Prompt Cache Performance dashboard (ID: 1310089). Co-Authored-By: Claude Opus 4.6 --- .../src/stream/streamChatResponse.helpers.ts | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/extensions/cli/src/stream/streamChatResponse.helpers.ts b/extensions/cli/src/stream/streamChatResponse.helpers.ts index 205a76fe85f..94c7e14eb9c 100644 --- a/extensions/cli/src/stream/streamChatResponse.helpers.ts +++ b/extensions/cli/src/stream/streamChatResponse.helpers.ts @@ -367,6 +367,11 @@ export function recordStreamTelemetry(options: { }); // Mirror core metrics to PostHog for product analytics + const cacheReadTokens = + fullUsage?.prompt_tokens_details?.cache_read_tokens ?? 0; + const cacheWriteTokens = + fullUsage?.prompt_tokens_details?.cache_write_tokens ?? 0; + try { posthogService.capture("apiRequest", { model: model.model, @@ -374,7 +379,21 @@ export function recordStreamTelemetry(options: { inputTokens: actualInputTokens, outputTokens: actualOutputTokens, costUsd: cost, + cacheReadTokens, + cacheWriteTokens, }); + + // Emit prompt_cache_metrics for the Prompt Cache Performance dashboard + if (actualInputTokens > 0) { + posthogService.capture("prompt_cache_metrics", { + model: model.model, + cache_read_tokens: cacheReadTokens, + cache_write_tokens: cacheWriteTokens, + total_prompt_tokens: actualInputTokens, + cache_hit_rate: cacheReadTokens / actualInputTokens, + tool_count: tools?.length ?? 0, + }); + } } catch {} return cost; From e27562ad8a1f4950d37ab8978f4d2360e93312ea Mon Sep 17 00:00:00 2001 From: Nate Date: Sat, 28 Feb 2026 13:15:34 -0800 Subject: [PATCH 3/6] Remove accidentally staged Anthropic files These files belong to a different PR and were accidentally included. Generated with [Continue](https://continue.dev) Co-Authored-By: Continue --- .../src/apis/Anthropic.test.ts | 112 ------------------ .../openai-adapters/src/apis/Anthropic.ts | 8 +- 2 files changed, 1 insertion(+), 119 deletions(-) delete mode 100644 packages/openai-adapters/src/apis/Anthropic.test.ts diff --git a/packages/openai-adapters/src/apis/Anthropic.test.ts b/packages/openai-adapters/src/apis/Anthropic.test.ts deleted file mode 100644 index a199b2593a2..00000000000 --- a/packages/openai-adapters/src/apis/Anthropic.test.ts +++ /dev/null @@ -1,112 +0,0 @@ -import { describe, expect, it } from "vitest"; - -import { CACHING_STRATEGIES } from "./AnthropicCachingStrategies.js"; -import { addCacheControlToLastTwoUserMessages } from "./AnthropicUtils.js"; -import { AnthropicApi } from "./Anthropic.js"; - -describe("AnthropicApi", () => { - describe("_convertBody applies cache_control to last two user messages", () => { - const api = new AnthropicApi({ - apiKey: "test-key", - cachingStrategy: "systemAndTools", - }); - - it("adds cache_control to the last two user messages", () => { - const body = api._convertToCleanAnthropicBody({ - model: "claude-sonnet-4-20250514", - messages: [ - { role: "system", content: "You are helpful." }, - { role: "user", content: "First user message" }, - { role: "assistant", content: "First response" }, - { role: "user", content: "Second user message" }, - { role: "assistant", content: "Second response" }, - { role: "user", content: "Third user message" }, - ], - }); - - const result = CACHING_STRATEGIES["systemAndTools"](body); - addCacheControlToLastTwoUserMessages(result.messages); - - // The last user message (index 4 = "Third user message") should have cache_control - const lastUserMsg = result.messages[4]; - expect(lastUserMsg.role).toBe("user"); - expect(Array.isArray(lastUserMsg.content)).toBe(true); - if (Array.isArray(lastUserMsg.content)) { - const textPart = lastUserMsg.content.find( - (p: any) => p.type === "text", - ); - expect(textPart?.cache_control).toEqual({ type: "ephemeral" }); - } - - // The second-to-last user message (index 2 = "Second user message") should also have cache_control - const secondLastUserMsg = result.messages[2]; - expect(secondLastUserMsg.role).toBe("user"); - expect(Array.isArray(secondLastUserMsg.content)).toBe(true); - if (Array.isArray(secondLastUserMsg.content)) { - const textPart = secondLastUserMsg.content.find( - (p: any) => p.type === "text", - ); - expect(textPart?.cache_control).toEqual({ type: "ephemeral" }); - } - - // The first user message (index 0 = "First user message") should NOT have cache_control - const firstUserMsg = result.messages[0]; - expect(firstUserMsg.role).toBe("user"); - expect(Array.isArray(firstUserMsg.content)).toBe(true); - if (Array.isArray(firstUserMsg.content)) { - const textPart = firstUserMsg.content.find( - (p: any) => p.type === "text", - ); - expect(textPart?.cache_control).toBeUndefined(); - } - }); - - it("handles conversations with only one user message", () => { - const body = api._convertToCleanAnthropicBody({ - model: "claude-sonnet-4-20250514", - messages: [ - { role: "system", content: "You are helpful." }, - { role: "user", content: "Only user message" }, - ], - }); - - const result = CACHING_STRATEGIES["systemAndTools"](body); - addCacheControlToLastTwoUserMessages(result.messages); - - const userMsg = result.messages[0]; - expect(userMsg.role).toBe("user"); - expect(Array.isArray(userMsg.content)).toBe(true); - if (Array.isArray(userMsg.content)) { - const textPart = userMsg.content.find((p: any) => p.type === "text"); - expect(textPart?.cache_control).toEqual({ type: "ephemeral" }); - } - }); - - it("still caches user messages even when caching strategy is none", () => { - const body = new AnthropicApi({ - apiKey: "test-key", - cachingStrategy: "none", - })._convertToCleanAnthropicBody({ - model: "claude-sonnet-4-20250514", - messages: [ - { role: "user", content: "Hello" }, - { role: "assistant", content: "Hi" }, - { role: "user", content: "How are you?" }, - ], - }); - - const result = CACHING_STRATEGIES["none"](body); - addCacheControlToLastTwoUserMessages(result.messages); - - // User message caching is applied regardless of strategy - const lastUserMsg = result.messages[2]; - expect(lastUserMsg.role).toBe("user"); - if (Array.isArray(lastUserMsg.content)) { - const textPart = lastUserMsg.content.find( - (p: any) => p.type === "text", - ); - expect(textPart?.cache_control).toEqual({ type: "ephemeral" }); - } - }); - }); -}); diff --git a/packages/openai-adapters/src/apis/Anthropic.ts b/packages/openai-adapters/src/apis/Anthropic.ts index 86b0513c980..217ee963a8a 100644 --- a/packages/openai-adapters/src/apis/Anthropic.ts +++ b/packages/openai-adapters/src/apis/Anthropic.ts @@ -40,7 +40,6 @@ import { CachingStrategyName, } from "./AnthropicCachingStrategies.js"; import { - addCacheControlToLastTwoUserMessages, getAnthropicHeaders, getAnthropicMediaTypeFromDataUrl, openAiToolChoiceToAnthropicToolChoice, @@ -74,12 +73,7 @@ export class AnthropicApi implements BaseLlmApi { // Step 2: Apply caching strategy const cachingStrategy = CACHING_STRATEGIES[this.config.cachingStrategy ?? "systemAndTools"]; - const result = cachingStrategy(cleanBody); - - // Step 3: Cache last two user messages for conversation turn caching - addCacheControlToLastTwoUserMessages(result.messages); - - return result; + return cachingStrategy(cleanBody); } private maxTokensForModel(model: string): number { From e997d6886fe41e5f701303f58c3f8a18258b5c37 Mon Sep 17 00:00:00 2001 From: Nate Date: Sat, 28 Feb 2026 13:34:36 -0800 Subject: [PATCH 4/6] Split system prompt into static/dynamic blocks for better caching Restructure constructSystemMessage() to return an array of content blocks instead of a single string. This separates: - Block 1 (static): Core identity and behavior instructions - identical across all users/projects, maximizing Anthropic cache hit rates - Block 2 (semi-static): User rules from AGENTS.md, config YAML - same within a session but differs per project - Block 3 (dynamic): Environment info (cwd, git status, platform, date) - changes per session The Anthropic API adapter already handles system message content as an array of {type:"text", text:string} blocks, and the caching strategies in AnthropicCachingStrategies.ts cache each block independently. By putting static content first, it gets cached and reused globally while dynamic content at the end doesn't invalidate the cached prefix. Co-Authored-By: Continue --- core/util/messageConversion.ts | 11 +- extensions/cli/src/__mocks__/systemMessage.ts | 13 +- extensions/cli/src/commands/serve.test.ts | 8 +- extensions/cli/src/commands/serve.ts | 14 +- .../src/services/SystemMessageService.test.ts | 15 +- .../cli/src/services/SystemMessageService.ts | 11 +- .../streamChatResponse.autoCompaction.test.ts | 11 +- .../streamChatResponse.autoCompaction.ts | 21 +-- ...treamChatResponse.autoContinuation.test.ts | 4 +- .../streamChatResponse.compactionHelpers.ts | 22 ++- .../streamChatResponse.systemMessage.test.ts | 24 ++- .../cli/src/stream/streamChatResponse.test.ts | 18 +- .../cli/src/stream/streamChatResponse.ts | 15 +- extensions/cli/src/subagent/executor.ts | 17 +- extensions/cli/src/systemMessage.test.ts | 134 ++++++++------ extensions/cli/src/systemMessage.ts | 167 +++++++++++------- 16 files changed, 317 insertions(+), 188 deletions(-) diff --git a/core/util/messageConversion.ts b/core/util/messageConversion.ts index dff75869933..6210d3d1a48 100644 --- a/core/util/messageConversion.ts +++ b/core/util/messageConversion.ts @@ -351,13 +351,16 @@ export function convertFromUnifiedHistory( } /** - * Convert ChatHistoryItem array to ChatCompletionMessageParam array with injected system message + * Convert ChatHistoryItem array to ChatCompletionMessageParam array with injected system message. + * Supports both a plain string and an array of content blocks for the system message. + * When an array is provided, it is passed as the system message content directly, + * which allows Anthropic's prompt caching to cache each block independently. * @param historyItems - The chat history items - * @param systemMessage - The system message to inject at the beginning + * @param systemMessage - The system message (string or array of {type:"text", text:string} blocks) */ export function convertFromUnifiedHistoryWithSystemMessage( historyItems: ChatHistoryItem[], - systemMessage: string, + systemMessage: string | Array<{ type: "text"; text: string }>, ): ChatCompletionMessageParam[] { const messages: ChatCompletionMessageParam[] = []; @@ -365,7 +368,7 @@ export function convertFromUnifiedHistoryWithSystemMessage( messages.push({ role: "system", content: systemMessage, - }); + } as ChatCompletionMessageParam); // Convert the rest of the history const convertedMessages = convertFromUnifiedHistory(historyItems); diff --git a/extensions/cli/src/__mocks__/systemMessage.ts b/extensions/cli/src/__mocks__/systemMessage.ts index 0599a90db28..467caea8c73 100644 --- a/extensions/cli/src/__mocks__/systemMessage.ts +++ b/extensions/cli/src/__mocks__/systemMessage.ts @@ -1,7 +1,14 @@ import { vi } from "vitest"; -export const constructSystemMessage = vi +export const constructSystemMessage = vi.fn().mockResolvedValue([ + { + type: "text", + text: "You are an agent in the Continue CLI. Given the user's prompt, you should use the tools available to you to answer the user's question.", + }, +]); + +export const flattenSystemMessage = vi .fn() - .mockResolvedValue( - "You are an agent in the Continue CLI. Given the user's prompt, you should use the tools available to you to answer the user's question.", + .mockImplementation((blocks: Array<{ type: string; text: string }>) => + blocks.map((b) => b.text).join("\n\n"), ); diff --git a/extensions/cli/src/commands/serve.test.ts b/extensions/cli/src/commands/serve.test.ts index 87487dc3247..ba070d9b3f9 100644 --- a/extensions/cli/src/commands/serve.test.ts +++ b/extensions/cli/src/commands/serve.test.ts @@ -43,7 +43,13 @@ describe("serve command", () => { })); vi.mock("../systemMessage.js", () => ({ - constructSystemMessage: vi.fn(() => Promise.resolve("System message")), + constructSystemMessage: vi.fn(() => + Promise.resolve([{ type: "text", text: "System message" }]), + ), + flattenSystemMessage: vi.fn( + (blocks: Array<{ text: string }>) => + blocks.map((b) => b.text).join("\n\n"), + ), })); vi.mock("../telemetry/telemetryService.js", () => ({ diff --git a/extensions/cli/src/commands/serve.ts b/extensions/cli/src/commands/serve.ts index 29e4618a7f7..c5aa2b28d97 100644 --- a/extensions/cli/src/commands/serve.ts +++ b/extensions/cli/src/commands/serve.ts @@ -29,7 +29,10 @@ import { loadOrCreateSessionById, } from "../session.js"; import { messageQueue } from "../stream/messageQueue.js"; -import { constructSystemMessage } from "../systemMessage.js"; +import { + constructSystemMessage, + flattenSystemMessage, +} from "../systemMessage.js"; import { telemetryService } from "../telemetry/telemetryService.js"; import { reportFailureTool } from "../tools/reportFailure.js"; import { gracefulExit, updateAgentMetadata } from "../util/exit.js"; @@ -153,7 +156,7 @@ export async function serve(prompt?: string, options: ServeOptions = {}) { } // Initialize session with system message - const systemMessage = await constructSystemMessage( + const systemMessageBlocks = await constructSystemMessage( permissionsState.currentMode, options.rule, undefined, @@ -161,9 +164,12 @@ export async function serve(prompt?: string, options: ServeOptions = {}) { ); const initialHistory: ChatHistoryItem[] = []; - if (systemMessage) { + if (systemMessageBlocks.length > 0) { initialHistory.push({ - message: { role: "system" as const, content: systemMessage }, + message: { + role: "system" as const, + content: flattenSystemMessage(systemMessageBlocks), + }, contextItems: [], }); } diff --git a/extensions/cli/src/services/SystemMessageService.test.ts b/extensions/cli/src/services/SystemMessageService.test.ts index d925c88b112..cc05135ea02 100644 --- a/extensions/cli/src/services/SystemMessageService.test.ts +++ b/extensions/cli/src/services/SystemMessageService.test.ts @@ -70,7 +70,10 @@ describe("SystemMessageService", () => { headless: true, }; - constructSystemMessageMock.mockResolvedValue("Test system message"); + const mockBlocks = [ + { type: "text", text: "Test system message" }, + ]; + constructSystemMessageMock.mockResolvedValue(mockBlocks); await service.initialize(config); const message = await service.getSystemMessage("normal"); @@ -81,7 +84,7 @@ describe("SystemMessageService", () => { "json", true, ); - expect(message).toBe("Test system message"); + expect(message).toEqual(mockBlocks); }); }); @@ -96,7 +99,9 @@ describe("SystemMessageService", () => { format: "json", }); - constructSystemMessageMock.mockResolvedValue("Updated message"); + constructSystemMessageMock.mockResolvedValue([ + { type: "text", text: "Updated message" }, + ]); await service.getSystemMessage("normal"); expect(constructSystemMessageMock).toHaveBeenCalledWith( @@ -117,7 +122,9 @@ describe("SystemMessageService", () => { additionalRules: ["rule2", "rule3"], }); - constructSystemMessageMock.mockResolvedValue("Updated message"); + constructSystemMessageMock.mockResolvedValue([ + { type: "text", text: "Updated message" }, + ]); await service.getSystemMessage("normal"); expect(constructSystemMessageMock).toHaveBeenCalledWith( diff --git a/extensions/cli/src/services/SystemMessageService.ts b/extensions/cli/src/services/SystemMessageService.ts index 44ae3098c4e..e16398c14ae 100644 --- a/extensions/cli/src/services/SystemMessageService.ts +++ b/extensions/cli/src/services/SystemMessageService.ts @@ -1,5 +1,8 @@ import { PermissionMode } from "../permissions/types.js"; -import { constructSystemMessage } from "../systemMessage.js"; +import { + constructSystemMessage, + SystemMessageBlock, +} from "../systemMessage.js"; import { logger } from "../util/logger.js"; import { BaseService } from "./BaseService.js"; @@ -46,7 +49,9 @@ export class SystemMessageService extends BaseService /** * Get a fresh system message with current mode and configuration */ - public async getSystemMessage(currentMode: PermissionMode): Promise { + public async getSystemMessage( + currentMode: PermissionMode, + ): Promise { const { additionalRules, format, headless } = this.currentState; const systemMessage = await constructSystemMessage( @@ -58,7 +63,7 @@ export class SystemMessageService extends BaseService logger.debug("Generated fresh system message", { mode: currentMode, - messageLength: systemMessage.length, + blockCount: systemMessage.length, }); return systemMessage; diff --git a/extensions/cli/src/stream/streamChatResponse.autoCompaction.test.ts b/extensions/cli/src/stream/streamChatResponse.autoCompaction.test.ts index 21b0a565133..02800f7b11a 100644 --- a/extensions/cli/src/stream/streamChatResponse.autoCompaction.test.ts +++ b/extensions/cli/src/stream/streamChatResponse.autoCompaction.test.ts @@ -43,7 +43,9 @@ vi.mock("../util/logger.js", () => ({ vi.mock("../services/index.js", () => ({ services: { systemMessage: { - getSystemMessage: vi.fn(() => Promise.resolve("System message")), + getSystemMessage: vi.fn(() => + Promise.resolve([{ type: "text", text: "System message" }]), + ), }, toolPermissions: { getState: vi.fn(() => ({ currentMode: "enabled" })), @@ -51,6 +53,13 @@ vi.mock("../services/index.js", () => ({ }, })); +vi.mock("../systemMessage.js", () => ({ + flattenSystemMessage: vi.fn( + (blocks: Array<{ text: string }>) => + blocks.map((b) => b.text).join("\n\n"), + ), +})); + vi.mock("os", async (importOriginal) => { const actual = (await importOriginal()) as object; return { diff --git a/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts b/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts index 156e9fa3526..3ecb965a73f 100644 --- a/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts +++ b/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts @@ -163,16 +163,17 @@ export async function handleAutoCompaction( try { // Get system message to calculate its token count for compaction pruning // Use provided message if available, otherwise fetch it (for backward compatibility) - const systemMessage = - providedSystemMessage ?? - (async () => { - const { services } = await import("../services/index.js"); - return services.systemMessage.getSystemMessage( - services.toolPermissions.getState().currentMode, - ); - })(); - const resolvedSystemMessage = - typeof systemMessage === "string" ? systemMessage : await systemMessage; + let resolvedSystemMessage: string; + if (providedSystemMessage) { + resolvedSystemMessage = providedSystemMessage; + } else { + const { services } = await import("../services/index.js"); + const { flattenSystemMessage } = await import("../systemMessage.js"); + const blocks = await services.systemMessage.getSystemMessage( + services.toolPermissions.getState().currentMode, + ); + resolvedSystemMessage = flattenSystemMessage(blocks); + } const { countChatHistoryItemTokens } = await import("../util/tokenizer.js"); const systemMessageTokens = countChatHistoryItemTokens( diff --git a/extensions/cli/src/stream/streamChatResponse.autoContinuation.test.ts b/extensions/cli/src/stream/streamChatResponse.autoContinuation.test.ts index 8f4e95a2b22..395479bb27d 100644 --- a/extensions/cli/src/stream/streamChatResponse.autoContinuation.test.ts +++ b/extensions/cli/src/stream/streamChatResponse.autoContinuation.test.ts @@ -52,7 +52,9 @@ vi.mock("../util/logger.js", () => ({ vi.mock("../services/index.js", () => ({ services: { systemMessage: { - getSystemMessage: vi.fn(() => Promise.resolve("System message")), + getSystemMessage: vi.fn(() => + Promise.resolve([{ type: "text", text: "System message" }]), + ), }, toolPermissions: { getState: vi.fn(() => ({ currentMode: "enabled" })), diff --git a/extensions/cli/src/stream/streamChatResponse.compactionHelpers.ts b/extensions/cli/src/stream/streamChatResponse.compactionHelpers.ts index b1d8fbe9543..b063df14b42 100644 --- a/extensions/cli/src/stream/streamChatResponse.compactionHelpers.ts +++ b/extensions/cli/src/stream/streamChatResponse.compactionHelpers.ts @@ -4,6 +4,10 @@ import type { ChatHistoryItem } from "core/index.js"; import type { ChatCompletionTool } from "openai/resources/chat/completions.mjs"; import { services } from "../services/index.js"; +import { + flattenSystemMessage, + SystemMessageBlock, +} from "../systemMessage.js"; import { ToolCall } from "../tools/index.js"; import { logger } from "../util/logger.js"; import { validateContextLength } from "../util/tokenizer.js"; @@ -17,7 +21,7 @@ export interface CompactionHelperOptions { isCompacting: boolean; isHeadless: boolean; callbacks?: StreamCallbacks; - systemMessage: string; + systemMessage: SystemMessageBlock[]; tools?: ChatCompletionTool[]; } @@ -42,6 +46,8 @@ export async function handlePreApiCompaction( return { chatHistory, wasCompacted: false }; } + const systemMessageString = flattenSystemMessage(systemMessage); + const { wasCompacted, chatHistory: preCompactHistory } = await handleAutoCompaction(chatHistory, model, llmApi, { isHeadless, @@ -49,7 +55,7 @@ export async function handlePreApiCompaction( onSystemMessage: callbacks?.onSystemMessage, onContent: callbacks?.onContent, }, - systemMessage, + systemMessage: systemMessageString, tools, }); @@ -84,6 +90,8 @@ export async function handlePostToolValidation( return { chatHistory, wasCompacted: false }; } + const systemMessageString = flattenSystemMessage(systemMessage); + // Get updated history after tool execution const chatHistorySvc = services.chatHistory; if ( @@ -98,7 +106,7 @@ export async function handlePostToolValidation( chatHistory, model, safetyBuffer: SAFETY_BUFFER, - systemMessage, + systemMessage: systemMessageString, tools, }); @@ -117,7 +125,7 @@ export async function handlePostToolValidation( onSystemMessage: callbacks?.onSystemMessage, onContent: callbacks?.onContent, }, - systemMessage, + systemMessage: systemMessageString, tools, }); @@ -136,7 +144,7 @@ export async function handlePostToolValidation( chatHistory, model, safetyBuffer: SAFETY_BUFFER, - systemMessage, + systemMessage: systemMessageString, tools, }); @@ -185,6 +193,8 @@ export async function handleNormalAutoCompaction( return { chatHistory, wasCompacted: false }; } + const systemMessageString = flattenSystemMessage(systemMessage); + const chatHistorySvc = services.chatHistory; if ( typeof chatHistorySvc?.isReady === "function" && @@ -200,7 +210,7 @@ export async function handleNormalAutoCompaction( onSystemMessage: callbacks?.onSystemMessage, onContent: callbacks?.onContent, }, - systemMessage, + systemMessage: systemMessageString, tools, }); diff --git a/extensions/cli/src/stream/streamChatResponse.systemMessage.test.ts b/extensions/cli/src/stream/streamChatResponse.systemMessage.test.ts index cd20f734c8c..b6c2f0cd542 100644 --- a/extensions/cli/src/stream/streamChatResponse.systemMessage.test.ts +++ b/extensions/cli/src/stream/streamChatResponse.systemMessage.test.ts @@ -23,6 +23,15 @@ vi.mock("../services/index.js", () => ({ }, })); +// Mock systemMessage module for flattenSystemMessage +vi.mock("../systemMessage.js", () => ({ + flattenSystemMessage: vi.fn( + (blocks: Array<{ text: string }>) => + blocks.map((b) => b.text).join("\n\n"), + ), + SystemMessageBlock: {}, +})); + // Mock logger vi.mock("../util/logger.js", () => ({ logger: { @@ -71,11 +80,8 @@ describe("streamChatResponse system message validation", () => { }, } as ModelConfig; - // Mock system message - const systemMessage = "System instructions"; - vi.mocked(services.systemMessage.getSystemMessage).mockResolvedValue( - systemMessage, - ); + // Mock system message as blocks + const systemMessage = [{ type: "text" as const, text: "System instructions" }]; // Small chat history const chatHistory: ChatHistoryItem[] = [ @@ -119,7 +125,9 @@ describe("streamChatResponse system message validation", () => { } as ModelConfig; // Small system message (50 tokens worth) - const smallSystemMessage = "x".repeat(200); // ~50 tokens + const smallSystemMessage = [ + { type: "text" as const, text: "x".repeat(200) }, + ]; // ~50 tokens // Small chat history (200 tokens worth) const chatHistory: ChatHistoryItem[] = [ @@ -171,7 +179,9 @@ describe("streamChatResponse system message validation", () => { // System message + history that's exactly at limit without buffer // but fails with buffer - const systemMessage = "x".repeat(800); // ~200 tokens + const systemMessage = [ + { type: "text" as const, text: "x".repeat(800) }, + ]; // ~200 tokens const chatHistory: ChatHistoryItem[] = [ { diff --git a/extensions/cli/src/stream/streamChatResponse.test.ts b/extensions/cli/src/stream/streamChatResponse.test.ts index 131132e3f64..a2e707e2774 100644 --- a/extensions/cli/src/stream/streamChatResponse.test.ts +++ b/extensions/cli/src/stream/streamChatResponse.test.ts @@ -152,7 +152,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); expect(result.content).toBe("I'll read the README file for you."); @@ -173,7 +173,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); expect(result.content).toBe("Let me search for that. "); @@ -222,7 +222,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); expect(result.content).toBe("I'll read the README file for you."); @@ -246,7 +246,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); expect(result.content).toBe("Hello world!"); @@ -371,7 +371,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); expect(responsesStream).toHaveBeenCalledTimes(1); @@ -410,7 +410,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); // Content is captured correctly @@ -461,7 +461,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); // Fixed: Both issues are resolved @@ -510,7 +510,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); } catch (error) { caughtError = error; @@ -524,7 +524,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); expect(result.content).toBe("Hello world!"); diff --git a/extensions/cli/src/stream/streamChatResponse.ts b/extensions/cli/src/stream/streamChatResponse.ts index d0b677b7410..f063060284d 100644 --- a/extensions/cli/src/stream/streamChatResponse.ts +++ b/extensions/cli/src/stream/streamChatResponse.ts @@ -10,6 +10,10 @@ import type { import { pruneLastMessage } from "../compaction.js"; import { services } from "../services/index.js"; +import { + flattenSystemMessage, + SystemMessageBlock, +} from "../systemMessage.js"; import { posthogService } from "../telemetry/posthogService.js"; import { telemetryService } from "../telemetry/telemetryService.js"; import { applyChatCompletionToolOverrides } from "../tools/applyToolOverrides.js"; @@ -190,7 +194,7 @@ interface ProcessStreamingResponseOptions { callbacks?: StreamCallbacks; isHeadless?: boolean; tools?: ChatCompletionTool[]; - systemMessage: string; + systemMessage: SystemMessageBlock[]; } // Process a single streaming response and return whether we need to continue @@ -216,6 +220,9 @@ export async function processStreamingResponse( let chatHistory = options.chatHistory; + // Flatten system message blocks to a string for token counting + const systemMessageString = flattenSystemMessage(systemMessage); + // Safety buffer to account for tokenization estimation errors const SAFETY_BUFFER = 100; @@ -224,7 +231,7 @@ export async function processStreamingResponse( chatHistory, model, safetyBuffer: SAFETY_BUFFER, - systemMessage, + systemMessage: systemMessageString, tools, }); @@ -241,7 +248,7 @@ export async function processStreamingResponse( chatHistory, model, safetyBuffer: SAFETY_BUFFER, - systemMessage, + systemMessage: systemMessageString, tools, }); } @@ -250,7 +257,7 @@ export async function processStreamingResponse( throw new Error(`Context length validation failed: ${validation.error}`); } - // Create OpenAI format history with validated system message + // Create OpenAI format history with system message blocks for optimal caching const openaiChatHistory = convertFromUnifiedHistoryWithSystemMessage( chatHistory, systemMessage, diff --git a/extensions/cli/src/subagent/executor.ts b/extensions/cli/src/subagent/executor.ts index 1580e7f7f9d..5c85b905282 100644 --- a/extensions/cli/src/subagent/executor.ts +++ b/extensions/cli/src/subagent/executor.ts @@ -5,6 +5,10 @@ import { serviceContainer } from "../services/ServiceContainer.js"; import type { ToolPermissionServiceState } from "../services/ToolPermissionService.js"; import { ModelServiceState, SERVICE_NAMES } from "../services/types.js"; import { streamChatResponse } from "../stream/streamChatResponse.js"; +import { + flattenSystemMessage, + SystemMessageBlock, +} from "../systemMessage.js"; import { escapeEvents } from "../util/cli.js"; import { logger } from "../util/logger.js"; @@ -29,18 +33,20 @@ export interface SubAgentResult { } /** - * Build system message for the agent + * Build system message for the agent as a flat string. + * Subagents receive a single combined string (they don't benefit from block-level caching). */ async function buildAgentSystemMessage( agent: ModelServiceState, services: any, ): Promise { - const baseMessage = services.systemMessage + const baseBlocks: SystemMessageBlock[] = services.systemMessage ? await services.systemMessage.getSystemMessage( services.toolPermissions.getState().currentMode, ) - : ""; + : []; + const baseMessage = flattenSystemMessage(baseBlocks); const agentPrompt = agent.model?.chatOptions?.baseSystemMessage || ""; // Combine base system message with agent-specific prompt @@ -101,9 +107,10 @@ export async function executeSubAgent( ? chatHistorySvc.isReady : undefined; - // Override system message for this execution + // Override system message for this execution (wrap in block format) if (services.systemMessage) { - services.systemMessage.getSystemMessage = async () => systemMessage; + services.systemMessage.getSystemMessage = async () => + [{ type: "text" as const, text: systemMessage }]; } // Temporarily disable ChatHistoryService to prevent it from interfering with child session diff --git a/extensions/cli/src/systemMessage.test.ts b/extensions/cli/src/systemMessage.test.ts index 1ebab348be0..80f1657e006 100644 --- a/extensions/cli/src/systemMessage.test.ts +++ b/extensions/cli/src/systemMessage.test.ts @@ -2,7 +2,9 @@ import { describe, expect, it, vi } from "vitest"; // Use the actual implementation instead of the mocked one vi.unmock("./systemMessage.js"); -const { constructSystemMessage } = await import("./systemMessage.js"); +const { constructSystemMessage, flattenSystemMessage } = await import( + "./systemMessage.js" +); // Mock the service container to avoid "No factory registered for service 'config'" error vi.mock("./services/ServiceContainer.js", () => ({ @@ -21,10 +23,30 @@ vi.mock("./hubLoader.js", () => ({ const PLAN_MODE_STRING = "You are operating in _Plan Mode_"; +// Helper to get flattened string from blocks +async function getFlattened( + ...args: Parameters +): Promise { + const blocks = await constructSystemMessage(...args); + return flattenSystemMessage(blocks); +} + describe("constructSystemMessage", () => { + it("should return an array of SystemMessageBlock objects", async () => { + const result = await constructSystemMessage("normal"); + + expect(Array.isArray(result)).toBe(true); + expect(result.length).toBeGreaterThanOrEqual(2); + for (const block of result) { + expect(block).toHaveProperty("type", "text"); + expect(block).toHaveProperty("text"); + expect(typeof block.text).toBe("string"); + } + }); + it("should return base system message with rules when additionalRules is provided", async () => { const rules = ["These are the rules for the assistant."]; - const result = await constructSystemMessage("normal", rules); + const result = await getFlattened("normal", rules); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain(''); @@ -34,7 +56,7 @@ describe("constructSystemMessage", () => { it("should return base system message with agent content when no rules but agent file exists", async () => { // The implementation checks for agent files like AGENTS.md which exists in this project - const result = await constructSystemMessage("normal"); + const result = await getFlattened("normal"); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain(''); @@ -42,7 +64,7 @@ describe("constructSystemMessage", () => { }); it("should include base system message components", async () => { - const result = await constructSystemMessage("normal"); + const result = await getFlattened("normal"); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain(""); @@ -50,27 +72,27 @@ describe("constructSystemMessage", () => { }); it("should handle whitespace-only rules message", async () => { - const result = await constructSystemMessage("normal", [" "]); + const result = await getFlattened("normal", [" "]); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain(''); }); it("should include working directory information", async () => { - const result = await constructSystemMessage("normal"); + const result = await getFlattened("normal"); expect(result).toContain("Working directory:"); expect(result).toContain(""); }); it("should include platform information", async () => { - const result = await constructSystemMessage("normal"); + const result = await getFlattened("normal"); expect(result).toContain("Platform:"); }); it("should include current date", async () => { - const result = await constructSystemMessage("normal"); + const result = await getFlattened("normal"); expect(result).toContain("Today's date:"); expect(result).toContain(new Date().toISOString().split("T")[0]); @@ -78,7 +100,7 @@ describe("constructSystemMessage", () => { it("should format rules section correctly", async () => { const rulesMessage = "Rule 1: Do this\nRule 2: Do that"; - const result = await constructSystemMessage("normal", [rulesMessage]); + const result = await getFlattened("normal", [rulesMessage]); expect(result).toContain(''); expect(result).toContain(rulesMessage); @@ -89,7 +111,7 @@ describe("constructSystemMessage", () => { const rulesMessage = `Rule 1: First rule Rule 2: Second rule Rule 3: Third rule`; - const result = await constructSystemMessage("normal", [rulesMessage]); + const result = await getFlattened("normal", [rulesMessage]); expect(result).toContain(rulesMessage); expect(result).toContain(''); @@ -98,7 +120,7 @@ Rule 3: Third rule`; it("should handle special characters in rules message", async () => { const rulesMessage = "Rule with characters & symbols!"; - const result = await constructSystemMessage("normal", [rulesMessage]); + const result = await getFlattened("normal", [rulesMessage]); expect(result).toContain(rulesMessage); expect(result).toContain(''); @@ -106,7 +128,7 @@ Rule 3: Third rule`; it("should handle very long rules message", async () => { const rulesMessage = "A".repeat(1000); - const result = await constructSystemMessage("normal", [rulesMessage]); + const result = await getFlattened("normal", [rulesMessage]); expect(result).toContain(rulesMessage); expect(result).toContain(''); @@ -114,7 +136,7 @@ Rule 3: Third rule`; it("should combine rules and agent content when both are present", async () => { const rulesMessage = "These are the rules."; - const result = await constructSystemMessage("normal", [rulesMessage]); + const result = await getFlattened("normal", [rulesMessage]); expect(result).toContain(''); expect(result).toContain(rulesMessage); @@ -123,12 +145,7 @@ Rule 3: Third rule`; }); it("should add headless mode instructions when headless is true", async () => { - const result = await constructSystemMessage( - "normal", - undefined, - undefined, - true, - ); + const result = await getFlattened("normal", undefined, undefined, true); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain("IMPORTANT: You are running in headless mode"); @@ -139,12 +156,7 @@ Rule 3: Third rule`; }); it("should not add headless mode instructions when headless is false", async () => { - const result = await constructSystemMessage( - "normal", - undefined, - undefined, - false, - ); + const result = await getFlattened("normal", undefined, undefined, false); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).not.toContain("IMPORTANT: You are running in headless mode"); @@ -152,7 +164,7 @@ Rule 3: Third rule`; }); it("should not add headless mode instructions when headless is undefined", async () => { - const result = await constructSystemMessage("normal"); + const result = await getFlattened("normal"); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).not.toContain("IMPORTANT: You are running in headless mode"); @@ -160,7 +172,7 @@ Rule 3: Third rule`; }); it("should add JSON format instructions when format is json", async () => { - const result = await constructSystemMessage("normal", undefined, "json"); + const result = await getFlattened("normal", undefined, "json"); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain( @@ -171,12 +183,7 @@ Rule 3: Third rule`; }); it("should add plan mode instructions when mode is plan", async () => { - const result = await constructSystemMessage( - "plan", - undefined, - undefined, - false, - ); + const result = await getFlattened("plan", undefined, undefined, false); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain(PLAN_MODE_STRING); @@ -191,12 +198,7 @@ Rule 3: Third rule`; }); it("should not add plan mode instructions when mode is not plan", async () => { - const result = await constructSystemMessage( - "normal", - undefined, - undefined, - false, - ); + const result = await getFlattened("normal", undefined, undefined, false); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).not.toContain(PLAN_MODE_STRING); @@ -206,7 +208,7 @@ Rule 3: Third rule`; }); it("should not add plan mode instructions when mode is normal", async () => { - const result = await constructSystemMessage("normal"); + const result = await getFlattened("normal"); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).not.toContain(PLAN_MODE_STRING); @@ -216,12 +218,7 @@ Rule 3: Third rule`; }); it("should include basic plan mode description", async () => { - const result = await constructSystemMessage( - "plan", - undefined, - undefined, - false, - ); + const result = await getFlattened("plan", undefined, undefined, false); expect(result).toContain(PLAN_MODE_STRING); expect(result).toContain("read-only tools"); @@ -229,12 +226,7 @@ Rule 3: Third rule`; }); it("should combine plan mode with headless mode instructions", async () => { - const result = await constructSystemMessage( - "plan", - undefined, - undefined, - true, - ); + const result = await getFlattened("plan", undefined, undefined, true); expect(result).toContain("IMPORTANT: You are running in headless mode"); expect(result).toContain(PLAN_MODE_STRING); @@ -244,12 +236,7 @@ Rule 3: Third rule`; }); it("should combine plan mode with JSON format instructions", async () => { - const result = await constructSystemMessage( - "plan", - undefined, - "json", - false, - ); + const result = await getFlattened("plan", undefined, "json", false); expect(result).toContain( "IMPORTANT: You are operating in JSON output mode", @@ -259,4 +246,35 @@ Rule 3: Third rule`; "which means that your goal is to help the user investigate their ideas", ); }); + + it("should put static content in first block and dynamic content in last block", async () => { + const blocks = await constructSystemMessage("normal"); + + // First block should have static identity content + expect(blocks[0].text).toContain("You are an agent in the Continue CLI"); + + // Last block should have dynamic environment content + const lastBlock = blocks[blocks.length - 1]; + expect(lastBlock.text).toContain("Working directory:"); + expect(lastBlock.text).toContain(""); + expect(lastBlock.text).toContain("Platform:"); + }); + + it("should put user rules in a separate middle block", async () => { + const rules = ["Custom rule"]; + const blocks = await constructSystemMessage("normal", rules); + + // Should have at least 3 blocks: static, rules, dynamic + expect(blocks.length).toBeGreaterThanOrEqual(3); + + // Find the rules block (not first, not last) + const rulesBlock = blocks.find( + (b) => + b.text.includes('') && + b !== blocks[0] && + b !== blocks[blocks.length - 1], + ); + expect(rulesBlock).toBeDefined(); + expect(rulesBlock!.text).toContain("Custom rule"); + }); }); diff --git a/extensions/cli/src/systemMessage.ts b/extensions/cli/src/systemMessage.ts index 6e3a7476290..8293fe46b66 100644 --- a/extensions/cli/src/systemMessage.ts +++ b/extensions/cli/src/systemMessage.ts @@ -7,6 +7,16 @@ import { PermissionMode } from "./permissions/types.js"; import { serviceContainer } from "./services/ServiceContainer.js"; import { ConfigServiceState, SERVICE_NAMES } from "./services/types.js"; +/** + * A content block within the system message. + * Split into separate blocks so that static content can be cached + * independently from dynamic content by Anthropic's prompt caching. + */ +export interface SystemMessageBlock { + type: "text"; + text: string; +} + /** * Check if current directory is a git repository */ @@ -37,26 +47,6 @@ function getGitStatus(): string { } } -const baseSystemMessage = `You are an agent in the Continue CLI. Given the user's prompt, you should use the tools available to you to answer the user's question. - -Notes: -1. IMPORTANT: You should be concise, direct, and to the point, since your responses will be displayed on a command line interface. -2. When relevant, share file names and code snippets relevant to the query -Here is useful information about the environment you are running in: - -Working directory: ${process.cwd()} -Is directory a git repo: ${isGitRepo()} -Platform: ${process.platform} -Today's date: ${new Date().toISOString().split("T")[0]} - - -As you answer the user's questions, you can use the following context: - -This is the git status at the start of the conversation. Note that this status is a snapshot in time, and will not update during the conversation. - -${getGitStatus()} -`; - async function getConfigYamlRules(): Promise { const configState = await serviceContainer.get( SERVICE_NAMES.CONFIG, @@ -75,21 +65,81 @@ async function getConfigYamlRules(): Promise { } /** - * Load and construct a comprehensive system message with base message and rules section + * Flatten system message blocks into a single string. + * Useful for contexts that need a plain string (e.g. token counting, subagent prompts). + */ +export function flattenSystemMessage(blocks: SystemMessageBlock[]): string { + return blocks.map((b) => b.text).join("\n\n"); +} + +/** + * Load and construct a comprehensive system message as an array of content blocks. + * + * The blocks are ordered so that static content comes first (cacheable across + * all users/projects), semi-static content (user rules, same within a session) + * comes next, and dynamic content (environment info that changes per session) + * comes last. This maximizes Anthropic prompt cache hit rates because the + * static prefix remains identical across requests. + * + * @param mode - Current permission mode * @param additionalRules - Additional rules from --rule flags * @param format - Output format for headless mode * @param headless - Whether running in headless mode - * @param mode - Current permission mode - * @returns The comprehensive system message with base message and rules section + * @returns Array of system message content blocks */ export async function constructSystemMessage( mode: PermissionMode, additionalRules?: string[], format?: "json", headless?: boolean, -): Promise { - const agentFiles = ["AGENTS.md", "AGENT.md", "CLAUDE.md", "CODEX.md"]; +): Promise { + // --- Block 1: Static core identity and behavior instructions --- + // This content is identical for ALL users and ALL projects, maximizing cache hits. + let staticBlock = `You are an agent in the Continue CLI. Given the user's prompt, you should use the tools available to you to answer the user's question. + +Notes: +1. IMPORTANT: You should be concise, direct, and to the point, since your responses will be displayed on a command line interface. +2. When relevant, share file names and code snippets relevant to the query`; + + // Add mode-specific instructions to static block (these depend only on the mode, + // not on the environment, so they cache well within a mode) + if (mode === "plan") { + staticBlock += + '\nYou are operating in _Plan Mode_, which means that your goal is to help the user investigate their ideas and develop a plan before taking action. You only have access to read-only tools and should not attempt to circumvent them to write / delete / create files. Ask the user to switch to agent mode if they want to make changes. For example, it is not acceptable to use the Bash tool to write to files.'; + } else { + // Check if commit signature is disabled via environment variable + if (!process.env.CONTINUE_CLI_DISABLE_COMMIT_SIGNATURE) { + staticBlock += `\nWhen creating commits using any CLI or tool, include the following in the commit message: +Generated with [Continue](https://continue.dev) + +Co-Authored-By: Continue +`; + } + } + // In headless mode, add instructions to be concise and only provide final answers + if (headless) { + staticBlock += ` + +IMPORTANT: You are running in headless mode. Provide ONLY your final answer to the user's question. Do not include explanations, reasoning, or additional commentary unless specifically requested. Be direct and concise.`; + } + + // Add JSON formatting instructions if format is json + if (format === "json") { + staticBlock += ` + +IMPORTANT: You are operating in JSON output mode. Your final response MUST be valid JSON that can be parsed by JSON.parse(). The JSON should contain properties relevant to answer the user's question. You don't need to include any general "response" or "answer" field. Do not include any text before or after the JSON - the entire response must be parseable JSON. + +Example response format: +{ + "property": "value" +}`; + } + + const blocks: SystemMessageBlock[] = [{ type: "text", text: staticBlock }]; + + // --- Block 2: Semi-static user rules (same within a session, differs per project) --- + const agentFiles = ["AGENTS.md", "AGENT.md", "CLAUDE.md", "CODEX.md"]; let agentContent = ""; try { @@ -124,59 +174,40 @@ export async function constructSystemMessage( const configYamlRules = await getConfigYamlRules(); processedRules.push(...configYamlRules); - // Construct the comprehensive system message - let systemMessage = baseSystemMessage; + if (agentContent || processedRules.length > 0) { + let rulesText = ''; - // Add plan mode specific instructions if in plan mode - if (mode === "plan") { - systemMessage += - '\nYou are operating in _Plan Mode_, which means that your goal is to help the user investigate their ideas and develop a plan before taking action. You only have access to read-only tools and should not attempt to circumvent them to write / delete / create files. Ask the user to switch to agent mode if they want to make changes. For example, it is not acceptable to use the Bash tool to write to files.\n'; - } else { - // Check if commit signature is disabled via environment variable - if (!process.env.CONTINUE_CLI_DISABLE_COMMIT_SIGNATURE) { - systemMessage += `\nWhen creating commits using any CLI or tool, include the following in the commit message: -Generated with [Continue](https://continue.dev) + if (agentContent) { + rulesText += `\n${agentContent}`; + } -Co-Authored-By: Continue -\n`; + if (processedRules.length > 0) { + const separator = agentContent ? "\n\n" : "\n"; + rulesText += `${separator}${processedRules.join("\n\n")}`; } - } - // In headless mode, add instructions to be concise and only provide final answers - if (headless) { - systemMessage += ` + rulesText += "\n"; -IMPORTANT: You are running in headless mode. Provide ONLY your final answer to the user's question. Do not include explanations, reasoning, or additional commentary unless specifically requested. Be direct and concise.`; + blocks.push({ type: "text", text: rulesText }); } - // Add JSON formatting instructions if format is json - if (format === "json") { - systemMessage += ` - -IMPORTANT: You are operating in JSON output mode. Your final response MUST be valid JSON that can be parsed by JSON.parse(). The JSON should contain properties relevant to answer the user's question. You don't need to include any general "response" or "answer" field. Do not include any text before or after the JSON - the entire response must be parseable JSON. - -Example response format: -{ - "property": "value" -}`; - } + // --- Block 3: Dynamic environment info (changes per session/directory) --- + const dynamicBlock = `Here is useful information about the environment you are running in: + +Working directory: ${process.cwd()} +Is directory a git repo: ${isGitRepo()} +Platform: ${process.platform} +Today's date: ${new Date().toISOString().split("T")[0]} + - // Add rules section if we have any rules or agent content - if (agentContent || processedRules.length > 0) { - systemMessage += '\n\n'; +As you answer the user's questions, you can use the following context: - if (agentContent) { - systemMessage += `\n${agentContent}`; - } +This is the git status at the start of the conversation. Note that this status is a snapshot in time, and will not update during the conversation. - // Add processed rules from --rule flags - if (processedRules.length > 0) { - const separator = agentContent ? "\n\n" : "\n"; - systemMessage += `${separator}${processedRules.join("\n\n")}`; - } +${getGitStatus()} +`; - systemMessage += "\n"; - } + blocks.push({ type: "text", text: dynamicBlock }); - return systemMessage; + return blocks; } From 20da455ef46e66f5c8863dddce26a960d4778006 Mon Sep 17 00:00:00 2001 From: Nate Date: Sat, 28 Feb 2026 16:20:42 -0800 Subject: [PATCH 5/6] Fix prettier formatting and use nullish check for system message Apply prettier formatting to PR-changed files and fix the truthy check for providedSystemMessage to use !== undefined, preserving behavior for explicitly empty system messages. Co-Authored-By: Claude Opus 4.6 --- extensions/cli/src/commands/serve.test.ts | 5 ++--- .../cli/src/services/SystemMessageService.test.ts | 4 +--- .../streamChatResponse.autoCompaction.test.ts | 5 ++--- .../src/stream/streamChatResponse.autoCompaction.ts | 2 +- .../stream/streamChatResponse.compactionHelpers.ts | 5 +---- .../stream/streamChatResponse.systemMessage.test.ts | 13 ++++++------- extensions/cli/src/stream/streamChatResponse.ts | 5 +---- extensions/cli/src/subagent/executor.ts | 10 ++++------ 8 files changed, 18 insertions(+), 31 deletions(-) diff --git a/extensions/cli/src/commands/serve.test.ts b/extensions/cli/src/commands/serve.test.ts index ba070d9b3f9..b9abf26f6c8 100644 --- a/extensions/cli/src/commands/serve.test.ts +++ b/extensions/cli/src/commands/serve.test.ts @@ -46,9 +46,8 @@ describe("serve command", () => { constructSystemMessage: vi.fn(() => Promise.resolve([{ type: "text", text: "System message" }]), ), - flattenSystemMessage: vi.fn( - (blocks: Array<{ text: string }>) => - blocks.map((b) => b.text).join("\n\n"), + flattenSystemMessage: vi.fn((blocks: Array<{ text: string }>) => + blocks.map((b) => b.text).join("\n\n"), ), })); diff --git a/extensions/cli/src/services/SystemMessageService.test.ts b/extensions/cli/src/services/SystemMessageService.test.ts index cc05135ea02..2d05ea5417b 100644 --- a/extensions/cli/src/services/SystemMessageService.test.ts +++ b/extensions/cli/src/services/SystemMessageService.test.ts @@ -70,9 +70,7 @@ describe("SystemMessageService", () => { headless: true, }; - const mockBlocks = [ - { type: "text", text: "Test system message" }, - ]; + const mockBlocks = [{ type: "text", text: "Test system message" }]; constructSystemMessageMock.mockResolvedValue(mockBlocks); await service.initialize(config); diff --git a/extensions/cli/src/stream/streamChatResponse.autoCompaction.test.ts b/extensions/cli/src/stream/streamChatResponse.autoCompaction.test.ts index 02800f7b11a..3ba8e708eae 100644 --- a/extensions/cli/src/stream/streamChatResponse.autoCompaction.test.ts +++ b/extensions/cli/src/stream/streamChatResponse.autoCompaction.test.ts @@ -54,9 +54,8 @@ vi.mock("../services/index.js", () => ({ })); vi.mock("../systemMessage.js", () => ({ - flattenSystemMessage: vi.fn( - (blocks: Array<{ text: string }>) => - blocks.map((b) => b.text).join("\n\n"), + flattenSystemMessage: vi.fn((blocks: Array<{ text: string }>) => + blocks.map((b) => b.text).join("\n\n"), ), })); diff --git a/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts b/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts index 3ecb965a73f..059aa4ef95b 100644 --- a/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts +++ b/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts @@ -164,7 +164,7 @@ export async function handleAutoCompaction( // Get system message to calculate its token count for compaction pruning // Use provided message if available, otherwise fetch it (for backward compatibility) let resolvedSystemMessage: string; - if (providedSystemMessage) { + if (providedSystemMessage !== undefined) { resolvedSystemMessage = providedSystemMessage; } else { const { services } = await import("../services/index.js"); diff --git a/extensions/cli/src/stream/streamChatResponse.compactionHelpers.ts b/extensions/cli/src/stream/streamChatResponse.compactionHelpers.ts index b063df14b42..0f83d88d468 100644 --- a/extensions/cli/src/stream/streamChatResponse.compactionHelpers.ts +++ b/extensions/cli/src/stream/streamChatResponse.compactionHelpers.ts @@ -4,10 +4,7 @@ import type { ChatHistoryItem } from "core/index.js"; import type { ChatCompletionTool } from "openai/resources/chat/completions.mjs"; import { services } from "../services/index.js"; -import { - flattenSystemMessage, - SystemMessageBlock, -} from "../systemMessage.js"; +import { flattenSystemMessage, SystemMessageBlock } from "../systemMessage.js"; import { ToolCall } from "../tools/index.js"; import { logger } from "../util/logger.js"; import { validateContextLength } from "../util/tokenizer.js"; diff --git a/extensions/cli/src/stream/streamChatResponse.systemMessage.test.ts b/extensions/cli/src/stream/streamChatResponse.systemMessage.test.ts index b6c2f0cd542..83ee66ae6a9 100644 --- a/extensions/cli/src/stream/streamChatResponse.systemMessage.test.ts +++ b/extensions/cli/src/stream/streamChatResponse.systemMessage.test.ts @@ -25,9 +25,8 @@ vi.mock("../services/index.js", () => ({ // Mock systemMessage module for flattenSystemMessage vi.mock("../systemMessage.js", () => ({ - flattenSystemMessage: vi.fn( - (blocks: Array<{ text: string }>) => - blocks.map((b) => b.text).join("\n\n"), + flattenSystemMessage: vi.fn((blocks: Array<{ text: string }>) => + blocks.map((b) => b.text).join("\n\n"), ), SystemMessageBlock: {}, })); @@ -81,7 +80,9 @@ describe("streamChatResponse system message validation", () => { } as ModelConfig; // Mock system message as blocks - const systemMessage = [{ type: "text" as const, text: "System instructions" }]; + const systemMessage = [ + { type: "text" as const, text: "System instructions" }, + ]; // Small chat history const chatHistory: ChatHistoryItem[] = [ @@ -179,9 +180,7 @@ describe("streamChatResponse system message validation", () => { // System message + history that's exactly at limit without buffer // but fails with buffer - const systemMessage = [ - { type: "text" as const, text: "x".repeat(800) }, - ]; // ~200 tokens + const systemMessage = [{ type: "text" as const, text: "x".repeat(800) }]; // ~200 tokens const chatHistory: ChatHistoryItem[] = [ { diff --git a/extensions/cli/src/stream/streamChatResponse.ts b/extensions/cli/src/stream/streamChatResponse.ts index f063060284d..512659348f2 100644 --- a/extensions/cli/src/stream/streamChatResponse.ts +++ b/extensions/cli/src/stream/streamChatResponse.ts @@ -10,10 +10,7 @@ import type { import { pruneLastMessage } from "../compaction.js"; import { services } from "../services/index.js"; -import { - flattenSystemMessage, - SystemMessageBlock, -} from "../systemMessage.js"; +import { flattenSystemMessage, SystemMessageBlock } from "../systemMessage.js"; import { posthogService } from "../telemetry/posthogService.js"; import { telemetryService } from "../telemetry/telemetryService.js"; import { applyChatCompletionToolOverrides } from "../tools/applyToolOverrides.js"; diff --git a/extensions/cli/src/subagent/executor.ts b/extensions/cli/src/subagent/executor.ts index 5c85b905282..5d2444c3ff3 100644 --- a/extensions/cli/src/subagent/executor.ts +++ b/extensions/cli/src/subagent/executor.ts @@ -5,10 +5,7 @@ import { serviceContainer } from "../services/ServiceContainer.js"; import type { ToolPermissionServiceState } from "../services/ToolPermissionService.js"; import { ModelServiceState, SERVICE_NAMES } from "../services/types.js"; import { streamChatResponse } from "../stream/streamChatResponse.js"; -import { - flattenSystemMessage, - SystemMessageBlock, -} from "../systemMessage.js"; +import { flattenSystemMessage, SystemMessageBlock } from "../systemMessage.js"; import { escapeEvents } from "../util/cli.js"; import { logger } from "../util/logger.js"; @@ -109,8 +106,9 @@ export async function executeSubAgent( // Override system message for this execution (wrap in block format) if (services.systemMessage) { - services.systemMessage.getSystemMessage = async () => - [{ type: "text" as const, text: systemMessage }]; + services.systemMessage.getSystemMessage = async () => [ + { type: "text" as const, text: systemMessage }, + ]; } // Temporarily disable ChatHistoryService to prevent it from interfering with child session From 400c329d367f1bda61b80adb7c5a0b3cabe09109 Mon Sep 17 00:00:00 2001 From: Nate Date: Sat, 28 Feb 2026 16:30:36 -0800 Subject: [PATCH 6/6] Fix lint errors and test mock for systemMessage refactor - Flip negated condition in autoCompaction to satisfy no-negated-condition rule - Remove unused eslint-disable complexity directives - Update vitest.setup.ts global mock to include flattenSystemMessage export and return SystemMessageBlock[] instead of plain string Co-Authored-By: Claude Opus 4.6 --- .../src/stream/streamChatResponse.autoCompaction.ts | 6 +++--- extensions/cli/src/stream/streamChatResponse.ts | 2 +- extensions/cli/src/subagent/executor.ts | 1 - extensions/cli/vitest.setup.ts | 12 +++++++++--- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts b/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts index 059aa4ef95b..8853476c60b 100644 --- a/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts +++ b/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts @@ -164,15 +164,15 @@ export async function handleAutoCompaction( // Get system message to calculate its token count for compaction pruning // Use provided message if available, otherwise fetch it (for backward compatibility) let resolvedSystemMessage: string; - if (providedSystemMessage !== undefined) { - resolvedSystemMessage = providedSystemMessage; - } else { + if (providedSystemMessage === undefined) { const { services } = await import("../services/index.js"); const { flattenSystemMessage } = await import("../systemMessage.js"); const blocks = await services.systemMessage.getSystemMessage( services.toolPermissions.getState().currentMode, ); resolvedSystemMessage = flattenSystemMessage(blocks); + } else { + resolvedSystemMessage = providedSystemMessage; } const { countChatHistoryItemTokens } = await import("../util/tokenizer.js"); diff --git a/extensions/cli/src/stream/streamChatResponse.ts b/extensions/cli/src/stream/streamChatResponse.ts index 512659348f2..9d0ae6204dc 100644 --- a/extensions/cli/src/stream/streamChatResponse.ts +++ b/extensions/cli/src/stream/streamChatResponse.ts @@ -195,7 +195,7 @@ interface ProcessStreamingResponseOptions { } // Process a single streaming response and return whether we need to continue -// eslint-disable-next-line max-statements, complexity +// eslint-disable-next-line max-statements export async function processStreamingResponse( options: ProcessStreamingResponseOptions, ): Promise<{ diff --git a/extensions/cli/src/subagent/executor.ts b/extensions/cli/src/subagent/executor.ts index 5d2444c3ff3..1c9c59f1c16 100644 --- a/extensions/cli/src/subagent/executor.ts +++ b/extensions/cli/src/subagent/executor.ts @@ -57,7 +57,6 @@ async function buildAgentSystemMessage( /** * Execute a subagent in a child session */ -// eslint-disable-next-line complexity export async function executeSubAgent( options: SubAgentExecutionOptions, ): Promise { diff --git a/extensions/cli/vitest.setup.ts b/extensions/cli/vitest.setup.ts index ac91939574b..b535e9b529c 100644 --- a/extensions/cli/vitest.setup.ts +++ b/extensions/cli/vitest.setup.ts @@ -50,10 +50,16 @@ vi.mock("open", () => ({ // Mock constructSystemMessage to avoid service container issues in tests vi.mock("./src/systemMessage.js", () => ({ - constructSystemMessage: vi + constructSystemMessage: vi.fn().mockResolvedValue([ + { + type: "text", + text: "You are an agent in the Continue CLI. Given the user's prompt, you should use the tools available to you to answer the user's question.", + }, + ]), + flattenSystemMessage: vi .fn() - .mockResolvedValue( - "You are an agent in the Continue CLI. Given the user's prompt, you should use the tools available to you to answer the user's question.", + .mockImplementation((blocks: Array<{ type: string; text: string }>) => + blocks.map((b) => b.text).join("\n\n"), ), }));