diff --git a/core/util/messageConversion.ts b/core/util/messageConversion.ts index dff75869933..6210d3d1a48 100644 --- a/core/util/messageConversion.ts +++ b/core/util/messageConversion.ts @@ -351,13 +351,16 @@ export function convertFromUnifiedHistory( } /** - * Convert ChatHistoryItem array to ChatCompletionMessageParam array with injected system message + * Convert ChatHistoryItem array to ChatCompletionMessageParam array with injected system message. + * Supports both a plain string and an array of content blocks for the system message. + * When an array is provided, it is passed as the system message content directly, + * which allows Anthropic's prompt caching to cache each block independently. * @param historyItems - The chat history items - * @param systemMessage - The system message to inject at the beginning + * @param systemMessage - The system message (string or array of {type:"text", text:string} blocks) */ export function convertFromUnifiedHistoryWithSystemMessage( historyItems: ChatHistoryItem[], - systemMessage: string, + systemMessage: string | Array<{ type: "text"; text: string }>, ): ChatCompletionMessageParam[] { const messages: ChatCompletionMessageParam[] = []; @@ -365,7 +368,7 @@ export function convertFromUnifiedHistoryWithSystemMessage( messages.push({ role: "system", content: systemMessage, - }); + } as ChatCompletionMessageParam); // Convert the rest of the history const convertedMessages = convertFromUnifiedHistory(historyItems); diff --git a/extensions/cli/src/__mocks__/systemMessage.ts b/extensions/cli/src/__mocks__/systemMessage.ts index 0599a90db28..467caea8c73 100644 --- a/extensions/cli/src/__mocks__/systemMessage.ts +++ b/extensions/cli/src/__mocks__/systemMessage.ts @@ -1,7 +1,14 @@ import { vi } from "vitest"; -export const constructSystemMessage = vi +export const constructSystemMessage = vi.fn().mockResolvedValue([ + { + type: "text", + text: "You are an agent in the Continue CLI. Given the user's prompt, you should use the tools available to you to answer the user's question.", + }, +]); + +export const flattenSystemMessage = vi .fn() - .mockResolvedValue( - "You are an agent in the Continue CLI. Given the user's prompt, you should use the tools available to you to answer the user's question.", + .mockImplementation((blocks: Array<{ type: string; text: string }>) => + blocks.map((b) => b.text).join("\n\n"), ); diff --git a/extensions/cli/src/commands/serve.test.ts b/extensions/cli/src/commands/serve.test.ts index 87487dc3247..b9abf26f6c8 100644 --- a/extensions/cli/src/commands/serve.test.ts +++ b/extensions/cli/src/commands/serve.test.ts @@ -43,7 +43,12 @@ describe("serve command", () => { })); vi.mock("../systemMessage.js", () => ({ - constructSystemMessage: vi.fn(() => Promise.resolve("System message")), + constructSystemMessage: vi.fn(() => + Promise.resolve([{ type: "text", text: "System message" }]), + ), + flattenSystemMessage: vi.fn((blocks: Array<{ text: string }>) => + blocks.map((b) => b.text).join("\n\n"), + ), })); vi.mock("../telemetry/telemetryService.js", () => ({ diff --git a/extensions/cli/src/commands/serve.ts b/extensions/cli/src/commands/serve.ts index 29e4618a7f7..c5aa2b28d97 100644 --- a/extensions/cli/src/commands/serve.ts +++ b/extensions/cli/src/commands/serve.ts @@ -29,7 +29,10 @@ import { loadOrCreateSessionById, } from "../session.js"; import { messageQueue } from "../stream/messageQueue.js"; -import { constructSystemMessage } from "../systemMessage.js"; +import { + constructSystemMessage, + flattenSystemMessage, +} from "../systemMessage.js"; import { telemetryService } from "../telemetry/telemetryService.js"; import { reportFailureTool } from "../tools/reportFailure.js"; import { gracefulExit, updateAgentMetadata } from "../util/exit.js"; @@ -153,7 +156,7 @@ export async function serve(prompt?: string, options: ServeOptions = {}) { } // Initialize session with system message - const systemMessage = await constructSystemMessage( + const systemMessageBlocks = await constructSystemMessage( permissionsState.currentMode, options.rule, undefined, @@ -161,9 +164,12 @@ export async function serve(prompt?: string, options: ServeOptions = {}) { ); const initialHistory: ChatHistoryItem[] = []; - if (systemMessage) { + if (systemMessageBlocks.length > 0) { initialHistory.push({ - message: { role: "system" as const, content: systemMessage }, + message: { + role: "system" as const, + content: flattenSystemMessage(systemMessageBlocks), + }, contextItems: [], }); } diff --git a/extensions/cli/src/services/SystemMessageService.test.ts b/extensions/cli/src/services/SystemMessageService.test.ts index d925c88b112..2d05ea5417b 100644 --- a/extensions/cli/src/services/SystemMessageService.test.ts +++ b/extensions/cli/src/services/SystemMessageService.test.ts @@ -70,7 +70,8 @@ describe("SystemMessageService", () => { headless: true, }; - constructSystemMessageMock.mockResolvedValue("Test system message"); + const mockBlocks = [{ type: "text", text: "Test system message" }]; + constructSystemMessageMock.mockResolvedValue(mockBlocks); await service.initialize(config); const message = await service.getSystemMessage("normal"); @@ -81,7 +82,7 @@ describe("SystemMessageService", () => { "json", true, ); - expect(message).toBe("Test system message"); + expect(message).toEqual(mockBlocks); }); }); @@ -96,7 +97,9 @@ describe("SystemMessageService", () => { format: "json", }); - constructSystemMessageMock.mockResolvedValue("Updated message"); + constructSystemMessageMock.mockResolvedValue([ + { type: "text", text: "Updated message" }, + ]); await service.getSystemMessage("normal"); expect(constructSystemMessageMock).toHaveBeenCalledWith( @@ -117,7 +120,9 @@ describe("SystemMessageService", () => { additionalRules: ["rule2", "rule3"], }); - constructSystemMessageMock.mockResolvedValue("Updated message"); + constructSystemMessageMock.mockResolvedValue([ + { type: "text", text: "Updated message" }, + ]); await service.getSystemMessage("normal"); expect(constructSystemMessageMock).toHaveBeenCalledWith( diff --git a/extensions/cli/src/services/SystemMessageService.ts b/extensions/cli/src/services/SystemMessageService.ts index 44ae3098c4e..e16398c14ae 100644 --- a/extensions/cli/src/services/SystemMessageService.ts +++ b/extensions/cli/src/services/SystemMessageService.ts @@ -1,5 +1,8 @@ import { PermissionMode } from "../permissions/types.js"; -import { constructSystemMessage } from "../systemMessage.js"; +import { + constructSystemMessage, + SystemMessageBlock, +} from "../systemMessage.js"; import { logger } from "../util/logger.js"; import { BaseService } from "./BaseService.js"; @@ -46,7 +49,9 @@ export class SystemMessageService extends BaseService /** * Get a fresh system message with current mode and configuration */ - public async getSystemMessage(currentMode: PermissionMode): Promise { + public async getSystemMessage( + currentMode: PermissionMode, + ): Promise { const { additionalRules, format, headless } = this.currentState; const systemMessage = await constructSystemMessage( @@ -58,7 +63,7 @@ export class SystemMessageService extends BaseService logger.debug("Generated fresh system message", { mode: currentMode, - messageLength: systemMessage.length, + blockCount: systemMessage.length, }); return systemMessage; diff --git a/extensions/cli/src/stream/streamChatResponse.autoCompaction.test.ts b/extensions/cli/src/stream/streamChatResponse.autoCompaction.test.ts index 21b0a565133..3ba8e708eae 100644 --- a/extensions/cli/src/stream/streamChatResponse.autoCompaction.test.ts +++ b/extensions/cli/src/stream/streamChatResponse.autoCompaction.test.ts @@ -43,7 +43,9 @@ vi.mock("../util/logger.js", () => ({ vi.mock("../services/index.js", () => ({ services: { systemMessage: { - getSystemMessage: vi.fn(() => Promise.resolve("System message")), + getSystemMessage: vi.fn(() => + Promise.resolve([{ type: "text", text: "System message" }]), + ), }, toolPermissions: { getState: vi.fn(() => ({ currentMode: "enabled" })), @@ -51,6 +53,12 @@ vi.mock("../services/index.js", () => ({ }, })); +vi.mock("../systemMessage.js", () => ({ + flattenSystemMessage: vi.fn((blocks: Array<{ text: string }>) => + blocks.map((b) => b.text).join("\n\n"), + ), +})); + vi.mock("os", async (importOriginal) => { const actual = (await importOriginal()) as object; return { diff --git a/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts b/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts index 156e9fa3526..8853476c60b 100644 --- a/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts +++ b/extensions/cli/src/stream/streamChatResponse.autoCompaction.ts @@ -163,16 +163,17 @@ export async function handleAutoCompaction( try { // Get system message to calculate its token count for compaction pruning // Use provided message if available, otherwise fetch it (for backward compatibility) - const systemMessage = - providedSystemMessage ?? - (async () => { - const { services } = await import("../services/index.js"); - return services.systemMessage.getSystemMessage( - services.toolPermissions.getState().currentMode, - ); - })(); - const resolvedSystemMessage = - typeof systemMessage === "string" ? systemMessage : await systemMessage; + let resolvedSystemMessage: string; + if (providedSystemMessage === undefined) { + const { services } = await import("../services/index.js"); + const { flattenSystemMessage } = await import("../systemMessage.js"); + const blocks = await services.systemMessage.getSystemMessage( + services.toolPermissions.getState().currentMode, + ); + resolvedSystemMessage = flattenSystemMessage(blocks); + } else { + resolvedSystemMessage = providedSystemMessage; + } const { countChatHistoryItemTokens } = await import("../util/tokenizer.js"); const systemMessageTokens = countChatHistoryItemTokens( diff --git a/extensions/cli/src/stream/streamChatResponse.autoContinuation.test.ts b/extensions/cli/src/stream/streamChatResponse.autoContinuation.test.ts index 8f4e95a2b22..395479bb27d 100644 --- a/extensions/cli/src/stream/streamChatResponse.autoContinuation.test.ts +++ b/extensions/cli/src/stream/streamChatResponse.autoContinuation.test.ts @@ -52,7 +52,9 @@ vi.mock("../util/logger.js", () => ({ vi.mock("../services/index.js", () => ({ services: { systemMessage: { - getSystemMessage: vi.fn(() => Promise.resolve("System message")), + getSystemMessage: vi.fn(() => + Promise.resolve([{ type: "text", text: "System message" }]), + ), }, toolPermissions: { getState: vi.fn(() => ({ currentMode: "enabled" })), diff --git a/extensions/cli/src/stream/streamChatResponse.compactionHelpers.ts b/extensions/cli/src/stream/streamChatResponse.compactionHelpers.ts index b1d8fbe9543..0f83d88d468 100644 --- a/extensions/cli/src/stream/streamChatResponse.compactionHelpers.ts +++ b/extensions/cli/src/stream/streamChatResponse.compactionHelpers.ts @@ -4,6 +4,7 @@ import type { ChatHistoryItem } from "core/index.js"; import type { ChatCompletionTool } from "openai/resources/chat/completions.mjs"; import { services } from "../services/index.js"; +import { flattenSystemMessage, SystemMessageBlock } from "../systemMessage.js"; import { ToolCall } from "../tools/index.js"; import { logger } from "../util/logger.js"; import { validateContextLength } from "../util/tokenizer.js"; @@ -17,7 +18,7 @@ export interface CompactionHelperOptions { isCompacting: boolean; isHeadless: boolean; callbacks?: StreamCallbacks; - systemMessage: string; + systemMessage: SystemMessageBlock[]; tools?: ChatCompletionTool[]; } @@ -42,6 +43,8 @@ export async function handlePreApiCompaction( return { chatHistory, wasCompacted: false }; } + const systemMessageString = flattenSystemMessage(systemMessage); + const { wasCompacted, chatHistory: preCompactHistory } = await handleAutoCompaction(chatHistory, model, llmApi, { isHeadless, @@ -49,7 +52,7 @@ export async function handlePreApiCompaction( onSystemMessage: callbacks?.onSystemMessage, onContent: callbacks?.onContent, }, - systemMessage, + systemMessage: systemMessageString, tools, }); @@ -84,6 +87,8 @@ export async function handlePostToolValidation( return { chatHistory, wasCompacted: false }; } + const systemMessageString = flattenSystemMessage(systemMessage); + // Get updated history after tool execution const chatHistorySvc = services.chatHistory; if ( @@ -98,7 +103,7 @@ export async function handlePostToolValidation( chatHistory, model, safetyBuffer: SAFETY_BUFFER, - systemMessage, + systemMessage: systemMessageString, tools, }); @@ -117,7 +122,7 @@ export async function handlePostToolValidation( onSystemMessage: callbacks?.onSystemMessage, onContent: callbacks?.onContent, }, - systemMessage, + systemMessage: systemMessageString, tools, }); @@ -136,7 +141,7 @@ export async function handlePostToolValidation( chatHistory, model, safetyBuffer: SAFETY_BUFFER, - systemMessage, + systemMessage: systemMessageString, tools, }); @@ -185,6 +190,8 @@ export async function handleNormalAutoCompaction( return { chatHistory, wasCompacted: false }; } + const systemMessageString = flattenSystemMessage(systemMessage); + const chatHistorySvc = services.chatHistory; if ( typeof chatHistorySvc?.isReady === "function" && @@ -200,7 +207,7 @@ export async function handleNormalAutoCompaction( onSystemMessage: callbacks?.onSystemMessage, onContent: callbacks?.onContent, }, - systemMessage, + systemMessage: systemMessageString, tools, }); diff --git a/extensions/cli/src/stream/streamChatResponse.helpers.ts b/extensions/cli/src/stream/streamChatResponse.helpers.ts index 205a76fe85f..94c7e14eb9c 100644 --- a/extensions/cli/src/stream/streamChatResponse.helpers.ts +++ b/extensions/cli/src/stream/streamChatResponse.helpers.ts @@ -367,6 +367,11 @@ export function recordStreamTelemetry(options: { }); // Mirror core metrics to PostHog for product analytics + const cacheReadTokens = + fullUsage?.prompt_tokens_details?.cache_read_tokens ?? 0; + const cacheWriteTokens = + fullUsage?.prompt_tokens_details?.cache_write_tokens ?? 0; + try { posthogService.capture("apiRequest", { model: model.model, @@ -374,7 +379,21 @@ export function recordStreamTelemetry(options: { inputTokens: actualInputTokens, outputTokens: actualOutputTokens, costUsd: cost, + cacheReadTokens, + cacheWriteTokens, }); + + // Emit prompt_cache_metrics for the Prompt Cache Performance dashboard + if (actualInputTokens > 0) { + posthogService.capture("prompt_cache_metrics", { + model: model.model, + cache_read_tokens: cacheReadTokens, + cache_write_tokens: cacheWriteTokens, + total_prompt_tokens: actualInputTokens, + cache_hit_rate: cacheReadTokens / actualInputTokens, + tool_count: tools?.length ?? 0, + }); + } } catch {} return cost; diff --git a/extensions/cli/src/stream/streamChatResponse.systemMessage.test.ts b/extensions/cli/src/stream/streamChatResponse.systemMessage.test.ts index cd20f734c8c..83ee66ae6a9 100644 --- a/extensions/cli/src/stream/streamChatResponse.systemMessage.test.ts +++ b/extensions/cli/src/stream/streamChatResponse.systemMessage.test.ts @@ -23,6 +23,14 @@ vi.mock("../services/index.js", () => ({ }, })); +// Mock systemMessage module for flattenSystemMessage +vi.mock("../systemMessage.js", () => ({ + flattenSystemMessage: vi.fn((blocks: Array<{ text: string }>) => + blocks.map((b) => b.text).join("\n\n"), + ), + SystemMessageBlock: {}, +})); + // Mock logger vi.mock("../util/logger.js", () => ({ logger: { @@ -71,11 +79,10 @@ describe("streamChatResponse system message validation", () => { }, } as ModelConfig; - // Mock system message - const systemMessage = "System instructions"; - vi.mocked(services.systemMessage.getSystemMessage).mockResolvedValue( - systemMessage, - ); + // Mock system message as blocks + const systemMessage = [ + { type: "text" as const, text: "System instructions" }, + ]; // Small chat history const chatHistory: ChatHistoryItem[] = [ @@ -119,7 +126,9 @@ describe("streamChatResponse system message validation", () => { } as ModelConfig; // Small system message (50 tokens worth) - const smallSystemMessage = "x".repeat(200); // ~50 tokens + const smallSystemMessage = [ + { type: "text" as const, text: "x".repeat(200) }, + ]; // ~50 tokens // Small chat history (200 tokens worth) const chatHistory: ChatHistoryItem[] = [ @@ -171,7 +180,7 @@ describe("streamChatResponse system message validation", () => { // System message + history that's exactly at limit without buffer // but fails with buffer - const systemMessage = "x".repeat(800); // ~200 tokens + const systemMessage = [{ type: "text" as const, text: "x".repeat(800) }]; // ~200 tokens const chatHistory: ChatHistoryItem[] = [ { diff --git a/extensions/cli/src/stream/streamChatResponse.test.ts b/extensions/cli/src/stream/streamChatResponse.test.ts index 131132e3f64..a2e707e2774 100644 --- a/extensions/cli/src/stream/streamChatResponse.test.ts +++ b/extensions/cli/src/stream/streamChatResponse.test.ts @@ -152,7 +152,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); expect(result.content).toBe("I'll read the README file for you."); @@ -173,7 +173,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); expect(result.content).toBe("Let me search for that. "); @@ -222,7 +222,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); expect(result.content).toBe("I'll read the README file for you."); @@ -246,7 +246,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); expect(result.content).toBe("Hello world!"); @@ -371,7 +371,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); expect(responsesStream).toHaveBeenCalledTimes(1); @@ -410,7 +410,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); // Content is captured correctly @@ -461,7 +461,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); // Fixed: Both issues are resolved @@ -510,7 +510,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); } catch (error) { caughtError = error; @@ -524,7 +524,7 @@ describe("processStreamingResponse - content preservation", () => { model: mockModel, llmApi: mockLlmApi, abortController: mockAbortController, - systemMessage: "You are a helpful assistant.", + systemMessage: [{ type: "text", text: "You are a helpful assistant." }], }); expect(result.content).toBe("Hello world!"); diff --git a/extensions/cli/src/stream/streamChatResponse.ts b/extensions/cli/src/stream/streamChatResponse.ts index d0b677b7410..9d0ae6204dc 100644 --- a/extensions/cli/src/stream/streamChatResponse.ts +++ b/extensions/cli/src/stream/streamChatResponse.ts @@ -10,6 +10,7 @@ import type { import { pruneLastMessage } from "../compaction.js"; import { services } from "../services/index.js"; +import { flattenSystemMessage, SystemMessageBlock } from "../systemMessage.js"; import { posthogService } from "../telemetry/posthogService.js"; import { telemetryService } from "../telemetry/telemetryService.js"; import { applyChatCompletionToolOverrides } from "../tools/applyToolOverrides.js"; @@ -190,11 +191,11 @@ interface ProcessStreamingResponseOptions { callbacks?: StreamCallbacks; isHeadless?: boolean; tools?: ChatCompletionTool[]; - systemMessage: string; + systemMessage: SystemMessageBlock[]; } // Process a single streaming response and return whether we need to continue -// eslint-disable-next-line max-statements, complexity +// eslint-disable-next-line max-statements export async function processStreamingResponse( options: ProcessStreamingResponseOptions, ): Promise<{ @@ -216,6 +217,9 @@ export async function processStreamingResponse( let chatHistory = options.chatHistory; + // Flatten system message blocks to a string for token counting + const systemMessageString = flattenSystemMessage(systemMessage); + // Safety buffer to account for tokenization estimation errors const SAFETY_BUFFER = 100; @@ -224,7 +228,7 @@ export async function processStreamingResponse( chatHistory, model, safetyBuffer: SAFETY_BUFFER, - systemMessage, + systemMessage: systemMessageString, tools, }); @@ -241,7 +245,7 @@ export async function processStreamingResponse( chatHistory, model, safetyBuffer: SAFETY_BUFFER, - systemMessage, + systemMessage: systemMessageString, tools, }); } @@ -250,7 +254,7 @@ export async function processStreamingResponse( throw new Error(`Context length validation failed: ${validation.error}`); } - // Create OpenAI format history with validated system message + // Create OpenAI format history with system message blocks for optimal caching const openaiChatHistory = convertFromUnifiedHistoryWithSystemMessage( chatHistory, systemMessage, diff --git a/extensions/cli/src/subagent/executor.ts b/extensions/cli/src/subagent/executor.ts index 1580e7f7f9d..1c9c59f1c16 100644 --- a/extensions/cli/src/subagent/executor.ts +++ b/extensions/cli/src/subagent/executor.ts @@ -5,6 +5,7 @@ import { serviceContainer } from "../services/ServiceContainer.js"; import type { ToolPermissionServiceState } from "../services/ToolPermissionService.js"; import { ModelServiceState, SERVICE_NAMES } from "../services/types.js"; import { streamChatResponse } from "../stream/streamChatResponse.js"; +import { flattenSystemMessage, SystemMessageBlock } from "../systemMessage.js"; import { escapeEvents } from "../util/cli.js"; import { logger } from "../util/logger.js"; @@ -29,18 +30,20 @@ export interface SubAgentResult { } /** - * Build system message for the agent + * Build system message for the agent as a flat string. + * Subagents receive a single combined string (they don't benefit from block-level caching). */ async function buildAgentSystemMessage( agent: ModelServiceState, services: any, ): Promise { - const baseMessage = services.systemMessage + const baseBlocks: SystemMessageBlock[] = services.systemMessage ? await services.systemMessage.getSystemMessage( services.toolPermissions.getState().currentMode, ) - : ""; + : []; + const baseMessage = flattenSystemMessage(baseBlocks); const agentPrompt = agent.model?.chatOptions?.baseSystemMessage || ""; // Combine base system message with agent-specific prompt @@ -54,7 +57,6 @@ async function buildAgentSystemMessage( /** * Execute a subagent in a child session */ -// eslint-disable-next-line complexity export async function executeSubAgent( options: SubAgentExecutionOptions, ): Promise { @@ -101,9 +103,11 @@ export async function executeSubAgent( ? chatHistorySvc.isReady : undefined; - // Override system message for this execution + // Override system message for this execution (wrap in block format) if (services.systemMessage) { - services.systemMessage.getSystemMessage = async () => systemMessage; + services.systemMessage.getSystemMessage = async () => [ + { type: "text" as const, text: systemMessage }, + ]; } // Temporarily disable ChatHistoryService to prevent it from interfering with child session diff --git a/extensions/cli/src/systemMessage.test.ts b/extensions/cli/src/systemMessage.test.ts index e419d0bac1a..80f1657e006 100644 --- a/extensions/cli/src/systemMessage.test.ts +++ b/extensions/cli/src/systemMessage.test.ts @@ -2,7 +2,9 @@ import { describe, expect, it, vi } from "vitest"; // Use the actual implementation instead of the mocked one vi.unmock("./systemMessage.js"); -const { constructSystemMessage } = await import("./systemMessage.js"); +const { constructSystemMessage, flattenSystemMessage } = await import( + "./systemMessage.js" +); // Mock the service container to avoid "No factory registered for service 'config'" error vi.mock("./services/ServiceContainer.js", () => ({ @@ -21,10 +23,30 @@ vi.mock("./hubLoader.js", () => ({ const PLAN_MODE_STRING = "You are operating in _Plan Mode_"; +// Helper to get flattened string from blocks +async function getFlattened( + ...args: Parameters +): Promise { + const blocks = await constructSystemMessage(...args); + return flattenSystemMessage(blocks); +} + describe("constructSystemMessage", () => { + it("should return an array of SystemMessageBlock objects", async () => { + const result = await constructSystemMessage("normal"); + + expect(Array.isArray(result)).toBe(true); + expect(result.length).toBeGreaterThanOrEqual(2); + for (const block of result) { + expect(block).toHaveProperty("type", "text"); + expect(block).toHaveProperty("text"); + expect(typeof block.text).toBe("string"); + } + }); + it("should return base system message with rules when additionalRules is provided", async () => { const rules = ["These are the rules for the assistant."]; - const result = await constructSystemMessage("normal", rules); + const result = await getFlattened("normal", rules); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain(''); @@ -34,7 +56,7 @@ describe("constructSystemMessage", () => { it("should return base system message with agent content when no rules but agent file exists", async () => { // The implementation checks for agent files like AGENTS.md which exists in this project - const result = await constructSystemMessage("normal"); + const result = await getFlattened("normal"); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain(''); @@ -42,36 +64,35 @@ describe("constructSystemMessage", () => { }); it("should include base system message components", async () => { - const result = await constructSystemMessage("normal"); + const result = await getFlattened("normal"); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain(""); - expect(result).toContain(''); expect(result).toContain(''); }); it("should handle whitespace-only rules message", async () => { - const result = await constructSystemMessage("normal", [" "]); + const result = await getFlattened("normal", [" "]); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain(''); }); it("should include working directory information", async () => { - const result = await constructSystemMessage("normal"); + const result = await getFlattened("normal"); expect(result).toContain("Working directory:"); expect(result).toContain(""); }); it("should include platform information", async () => { - const result = await constructSystemMessage("normal"); + const result = await getFlattened("normal"); expect(result).toContain("Platform:"); }); it("should include current date", async () => { - const result = await constructSystemMessage("normal"); + const result = await getFlattened("normal"); expect(result).toContain("Today's date:"); expect(result).toContain(new Date().toISOString().split("T")[0]); @@ -79,7 +100,7 @@ describe("constructSystemMessage", () => { it("should format rules section correctly", async () => { const rulesMessage = "Rule 1: Do this\nRule 2: Do that"; - const result = await constructSystemMessage("normal", [rulesMessage]); + const result = await getFlattened("normal", [rulesMessage]); expect(result).toContain(''); expect(result).toContain(rulesMessage); @@ -90,7 +111,7 @@ describe("constructSystemMessage", () => { const rulesMessage = `Rule 1: First rule Rule 2: Second rule Rule 3: Third rule`; - const result = await constructSystemMessage("normal", [rulesMessage]); + const result = await getFlattened("normal", [rulesMessage]); expect(result).toContain(rulesMessage); expect(result).toContain(''); @@ -99,7 +120,7 @@ Rule 3: Third rule`; it("should handle special characters in rules message", async () => { const rulesMessage = "Rule with characters & symbols!"; - const result = await constructSystemMessage("normal", [rulesMessage]); + const result = await getFlattened("normal", [rulesMessage]); expect(result).toContain(rulesMessage); expect(result).toContain(''); @@ -107,7 +128,7 @@ Rule 3: Third rule`; it("should handle very long rules message", async () => { const rulesMessage = "A".repeat(1000); - const result = await constructSystemMessage("normal", [rulesMessage]); + const result = await getFlattened("normal", [rulesMessage]); expect(result).toContain(rulesMessage); expect(result).toContain(''); @@ -115,7 +136,7 @@ Rule 3: Third rule`; it("should combine rules and agent content when both are present", async () => { const rulesMessage = "These are the rules."; - const result = await constructSystemMessage("normal", [rulesMessage]); + const result = await getFlattened("normal", [rulesMessage]); expect(result).toContain(''); expect(result).toContain(rulesMessage); @@ -124,12 +145,7 @@ Rule 3: Third rule`; }); it("should add headless mode instructions when headless is true", async () => { - const result = await constructSystemMessage( - "normal", - undefined, - undefined, - true, - ); + const result = await getFlattened("normal", undefined, undefined, true); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain("IMPORTANT: You are running in headless mode"); @@ -140,12 +156,7 @@ Rule 3: Third rule`; }); it("should not add headless mode instructions when headless is false", async () => { - const result = await constructSystemMessage( - "normal", - undefined, - undefined, - false, - ); + const result = await getFlattened("normal", undefined, undefined, false); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).not.toContain("IMPORTANT: You are running in headless mode"); @@ -153,7 +164,7 @@ Rule 3: Third rule`; }); it("should not add headless mode instructions when headless is undefined", async () => { - const result = await constructSystemMessage("normal"); + const result = await getFlattened("normal"); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).not.toContain("IMPORTANT: You are running in headless mode"); @@ -161,7 +172,7 @@ Rule 3: Third rule`; }); it("should add JSON format instructions when format is json", async () => { - const result = await constructSystemMessage("normal", undefined, "json"); + const result = await getFlattened("normal", undefined, "json"); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain( @@ -172,12 +183,7 @@ Rule 3: Third rule`; }); it("should add plan mode instructions when mode is plan", async () => { - const result = await constructSystemMessage( - "plan", - undefined, - undefined, - false, - ); + const result = await getFlattened("plan", undefined, undefined, false); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).toContain(PLAN_MODE_STRING); @@ -192,12 +198,7 @@ Rule 3: Third rule`; }); it("should not add plan mode instructions when mode is not plan", async () => { - const result = await constructSystemMessage( - "normal", - undefined, - undefined, - false, - ); + const result = await getFlattened("normal", undefined, undefined, false); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).not.toContain(PLAN_MODE_STRING); @@ -207,7 +208,7 @@ Rule 3: Third rule`; }); it("should not add plan mode instructions when mode is normal", async () => { - const result = await constructSystemMessage("normal"); + const result = await getFlattened("normal"); expect(result).toContain("You are an agent in the Continue CLI"); expect(result).not.toContain(PLAN_MODE_STRING); @@ -217,12 +218,7 @@ Rule 3: Third rule`; }); it("should include basic plan mode description", async () => { - const result = await constructSystemMessage( - "plan", - undefined, - undefined, - false, - ); + const result = await getFlattened("plan", undefined, undefined, false); expect(result).toContain(PLAN_MODE_STRING); expect(result).toContain("read-only tools"); @@ -230,12 +226,7 @@ Rule 3: Third rule`; }); it("should combine plan mode with headless mode instructions", async () => { - const result = await constructSystemMessage( - "plan", - undefined, - undefined, - true, - ); + const result = await getFlattened("plan", undefined, undefined, true); expect(result).toContain("IMPORTANT: You are running in headless mode"); expect(result).toContain(PLAN_MODE_STRING); @@ -245,12 +236,7 @@ Rule 3: Third rule`; }); it("should combine plan mode with JSON format instructions", async () => { - const result = await constructSystemMessage( - "plan", - undefined, - "json", - false, - ); + const result = await getFlattened("plan", undefined, "json", false); expect(result).toContain( "IMPORTANT: You are operating in JSON output mode", @@ -260,4 +246,35 @@ Rule 3: Third rule`; "which means that your goal is to help the user investigate their ideas", ); }); + + it("should put static content in first block and dynamic content in last block", async () => { + const blocks = await constructSystemMessage("normal"); + + // First block should have static identity content + expect(blocks[0].text).toContain("You are an agent in the Continue CLI"); + + // Last block should have dynamic environment content + const lastBlock = blocks[blocks.length - 1]; + expect(lastBlock.text).toContain("Working directory:"); + expect(lastBlock.text).toContain(""); + expect(lastBlock.text).toContain("Platform:"); + }); + + it("should put user rules in a separate middle block", async () => { + const rules = ["Custom rule"]; + const blocks = await constructSystemMessage("normal", rules); + + // Should have at least 3 blocks: static, rules, dynamic + expect(blocks.length).toBeGreaterThanOrEqual(3); + + // Find the rules block (not first, not last) + const rulesBlock = blocks.find( + (b) => + b.text.includes('') && + b !== blocks[0] && + b !== blocks[blocks.length - 1], + ); + expect(rulesBlock).toBeDefined(); + expect(rulesBlock!.text).toContain("Custom rule"); + }); }); diff --git a/extensions/cli/src/systemMessage.ts b/extensions/cli/src/systemMessage.ts index b3277975e2a..8293fe46b66 100644 --- a/extensions/cli/src/systemMessage.ts +++ b/extensions/cli/src/systemMessage.ts @@ -2,14 +2,20 @@ import { execSync } from "child_process"; import * as fs from "fs"; import * as path from "path"; -import pkg from "ignore-walk"; -import { Minimatch } from "minimatch"; - import { processRule } from "./hubLoader.js"; import { PermissionMode } from "./permissions/types.js"; import { serviceContainer } from "./services/ServiceContainer.js"; import { ConfigServiceState, SERVICE_NAMES } from "./services/types.js"; -const { WalkerSync } = pkg; + +/** + * A content block within the system message. + * Split into separate blocks so that static content can be cached + * independently from dynamic content by Anthropic's prompt caching. + */ +export interface SystemMessageBlock { + type: "text"; + text: string; +} /** * Check if current directory is a git repository @@ -23,39 +29,6 @@ function isGitRepo(): boolean { } } -/** - * Get basic directory structure - */ -function getDirectoryStructure(): string { - try { - const walker = new WalkerSync({ - path: process.cwd(), - includeEmpty: false, - follow: false, - ignoreFiles: [".gitignore", ".continueignore", ".customignore"], - }); - - (walker.ignoreRules as any)[".customignore"] = [ - new Minimatch(".git/*", { - matchBase: true, - dot: true, - flipNegate: true, - nocase: true, - }), - ]; - - const files = walker.start().result as string[]; - - const filteredFiles = files - .slice(0, 500) - .map((file: string) => `./${file}`); - - return filteredFiles.join("\n") || "No structure available"; - } catch { - return "Directory structure not available"; - } -} - /** * Get git status */ @@ -74,30 +47,6 @@ function getGitStatus(): string { } } -const baseSystemMessage = `You are an agent in the Continue CLI. Given the user's prompt, you should use the tools available to you to answer the user's question. - -Notes: -1. IMPORTANT: You should be concise, direct, and to the point, since your responses will be displayed on a command line interface. -2. When relevant, share file names and code snippets relevant to the query -Here is useful information about the environment you are running in: - -Working directory: ${process.cwd()} -Is directory a git repo: ${isGitRepo()} -Platform: ${process.platform} -Today's date: ${new Date().toISOString().split("T")[0]} - - -As you answer the user's questions, you can use the following context: - -Below is a snapshot of this project's file structure at the start of the conversation. This snapshot will NOT update during the conversation. It skips over .gitignore patterns. - -${getDirectoryStructure()} - -This is the git status at the start of the conversation. Note that this status is a snapshot in time, and will not update during the conversation. - -${getGitStatus()} -`; - async function getConfigYamlRules(): Promise { const configState = await serviceContainer.get( SERVICE_NAMES.CONFIG, @@ -116,21 +65,81 @@ async function getConfigYamlRules(): Promise { } /** - * Load and construct a comprehensive system message with base message and rules section + * Flatten system message blocks into a single string. + * Useful for contexts that need a plain string (e.g. token counting, subagent prompts). + */ +export function flattenSystemMessage(blocks: SystemMessageBlock[]): string { + return blocks.map((b) => b.text).join("\n\n"); +} + +/** + * Load and construct a comprehensive system message as an array of content blocks. + * + * The blocks are ordered so that static content comes first (cacheable across + * all users/projects), semi-static content (user rules, same within a session) + * comes next, and dynamic content (environment info that changes per session) + * comes last. This maximizes Anthropic prompt cache hit rates because the + * static prefix remains identical across requests. + * + * @param mode - Current permission mode * @param additionalRules - Additional rules from --rule flags * @param format - Output format for headless mode * @param headless - Whether running in headless mode - * @param mode - Current permission mode - * @returns The comprehensive system message with base message and rules section + * @returns Array of system message content blocks */ export async function constructSystemMessage( mode: PermissionMode, additionalRules?: string[], format?: "json", headless?: boolean, -): Promise { - const agentFiles = ["AGENTS.md", "AGENT.md", "CLAUDE.md", "CODEX.md"]; +): Promise { + // --- Block 1: Static core identity and behavior instructions --- + // This content is identical for ALL users and ALL projects, maximizing cache hits. + let staticBlock = `You are an agent in the Continue CLI. Given the user's prompt, you should use the tools available to you to answer the user's question. + +Notes: +1. IMPORTANT: You should be concise, direct, and to the point, since your responses will be displayed on a command line interface. +2. When relevant, share file names and code snippets relevant to the query`; + + // Add mode-specific instructions to static block (these depend only on the mode, + // not on the environment, so they cache well within a mode) + if (mode === "plan") { + staticBlock += + '\nYou are operating in _Plan Mode_, which means that your goal is to help the user investigate their ideas and develop a plan before taking action. You only have access to read-only tools and should not attempt to circumvent them to write / delete / create files. Ask the user to switch to agent mode if they want to make changes. For example, it is not acceptable to use the Bash tool to write to files.'; + } else { + // Check if commit signature is disabled via environment variable + if (!process.env.CONTINUE_CLI_DISABLE_COMMIT_SIGNATURE) { + staticBlock += `\nWhen creating commits using any CLI or tool, include the following in the commit message: +Generated with [Continue](https://continue.dev) + +Co-Authored-By: Continue +`; + } + } + + // In headless mode, add instructions to be concise and only provide final answers + if (headless) { + staticBlock += ` +IMPORTANT: You are running in headless mode. Provide ONLY your final answer to the user's question. Do not include explanations, reasoning, or additional commentary unless specifically requested. Be direct and concise.`; + } + + // Add JSON formatting instructions if format is json + if (format === "json") { + staticBlock += ` + +IMPORTANT: You are operating in JSON output mode. Your final response MUST be valid JSON that can be parsed by JSON.parse(). The JSON should contain properties relevant to answer the user's question. You don't need to include any general "response" or "answer" field. Do not include any text before or after the JSON - the entire response must be parseable JSON. + +Example response format: +{ + "property": "value" +}`; + } + + const blocks: SystemMessageBlock[] = [{ type: "text", text: staticBlock }]; + + // --- Block 2: Semi-static user rules (same within a session, differs per project) --- + const agentFiles = ["AGENTS.md", "AGENT.md", "CLAUDE.md", "CODEX.md"]; let agentContent = ""; try { @@ -165,59 +174,40 @@ export async function constructSystemMessage( const configYamlRules = await getConfigYamlRules(); processedRules.push(...configYamlRules); - // Construct the comprehensive system message - let systemMessage = baseSystemMessage; + if (agentContent || processedRules.length > 0) { + let rulesText = ''; - // Add plan mode specific instructions if in plan mode - if (mode === "plan") { - systemMessage += - '\nYou are operating in _Plan Mode_, which means that your goal is to help the user investigate their ideas and develop a plan before taking action. You only have access to read-only tools and should not attempt to circumvent them to write / delete / create files. Ask the user to switch to agent mode if they want to make changes. For example, it is not acceptable to use the Bash tool to write to files.\n'; - } else { - // Check if commit signature is disabled via environment variable - if (!process.env.CONTINUE_CLI_DISABLE_COMMIT_SIGNATURE) { - systemMessage += `\nWhen creating commits using any CLI or tool, include the following in the commit message: -Generated with [Continue](https://continue.dev) + if (agentContent) { + rulesText += `\n${agentContent}`; + } -Co-Authored-By: Continue -\n`; + if (processedRules.length > 0) { + const separator = agentContent ? "\n\n" : "\n"; + rulesText += `${separator}${processedRules.join("\n\n")}`; } - } - // In headless mode, add instructions to be concise and only provide final answers - if (headless) { - systemMessage += ` + rulesText += "\n"; -IMPORTANT: You are running in headless mode. Provide ONLY your final answer to the user's question. Do not include explanations, reasoning, or additional commentary unless specifically requested. Be direct and concise.`; + blocks.push({ type: "text", text: rulesText }); } - // Add JSON formatting instructions if format is json - if (format === "json") { - systemMessage += ` - -IMPORTANT: You are operating in JSON output mode. Your final response MUST be valid JSON that can be parsed by JSON.parse(). The JSON should contain properties relevant to answer the user's question. You don't need to include any general "response" or "answer" field. Do not include any text before or after the JSON - the entire response must be parseable JSON. - -Example response format: -{ - "property": "value" -}`; - } + // --- Block 3: Dynamic environment info (changes per session/directory) --- + const dynamicBlock = `Here is useful information about the environment you are running in: + +Working directory: ${process.cwd()} +Is directory a git repo: ${isGitRepo()} +Platform: ${process.platform} +Today's date: ${new Date().toISOString().split("T")[0]} + - // Add rules section if we have any rules or agent content - if (agentContent || processedRules.length > 0) { - systemMessage += '\n\n'; +As you answer the user's questions, you can use the following context: - if (agentContent) { - systemMessage += `\n${agentContent}`; - } +This is the git status at the start of the conversation. Note that this status is a snapshot in time, and will not update during the conversation. - // Add processed rules from --rule flags - if (processedRules.length > 0) { - const separator = agentContent ? "\n\n" : "\n"; - systemMessage += `${separator}${processedRules.join("\n\n")}`; - } +${getGitStatus()} +`; - systemMessage += "\n"; - } + blocks.push({ type: "text", text: dynamicBlock }); - return systemMessage; + return blocks; } diff --git a/extensions/cli/vitest.setup.ts b/extensions/cli/vitest.setup.ts index ac91939574b..b535e9b529c 100644 --- a/extensions/cli/vitest.setup.ts +++ b/extensions/cli/vitest.setup.ts @@ -50,10 +50,16 @@ vi.mock("open", () => ({ // Mock constructSystemMessage to avoid service container issues in tests vi.mock("./src/systemMessage.js", () => ({ - constructSystemMessage: vi + constructSystemMessage: vi.fn().mockResolvedValue([ + { + type: "text", + text: "You are an agent in the Continue CLI. Given the user's prompt, you should use the tools available to you to answer the user's question.", + }, + ]), + flattenSystemMessage: vi .fn() - .mockResolvedValue( - "You are an agent in the Continue CLI. Given the user's prompt, you should use the tools available to you to answer the user's question.", + .mockImplementation((blocks: Array<{ type: string; text: string }>) => + blocks.map((b) => b.text).join("\n\n"), ), }));