continuedev · sestinj · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026
@@ -351,21 +351,24 @@ export function convertFromUnifiedHistory(
 }
 
 /**
- * Convert ChatHistoryItem array to ChatCompletionMessageParam array with injected system message
+ * Convert ChatHistoryItem array to ChatCompletionMessageParam array with injected system message.
+ * Supports both a plain string and an array of content blocks for the system message.
+ * When an array is provided, it is passed as the system message content directly,
+ * which allows Anthropic's prompt caching to cache each block independently.
  * @param historyItems - The chat history items
- * @param systemMessage - The system message to inject at the beginning
+ * @param systemMessage - The system message (string or array of {type:"text", text:string} blocks)
  */
 export function convertFromUnifiedHistoryWithSystemMessage(
   historyItems: ChatHistoryItem[],
-  systemMessage: string,
+  systemMessage: string | Array<{ type: "text"; text: string }>,
 ): ChatCompletionMessageParam[] {
   const messages: ChatCompletionMessageParam[] = [];
 
   // Inject system message at the beginning
   messages.push({
     role: "system",
     content: systemMessage,
-  });
+  } as ChatCompletionMessageParam);
 
   // Convert the rest of the history
   const convertedMessages = convertFromUnifiedHistory(historyItems);

@@ -1,7 +1,14 @@
 import { vi } from "vitest";
 
-export const constructSystemMessage = vi
+export const constructSystemMessage = vi.fn().mockResolvedValue([
+  {
+    type: "text",
+    text: "You are an agent in the Continue CLI. Given the user's prompt, you should use the tools available to you to answer the user's question.",
+  },
+]);
+
+export const flattenSystemMessage = vi
   .fn()
-  .mockResolvedValue(
-    "You are an agent in the Continue CLI. Given the user's prompt, you should use the tools available to you to answer the user's question.",
+  .mockImplementation((blocks: Array<{ type: string; text: string }>) =>
+    blocks.map((b) => b.text).join("\n\n"),
   );
@@ -43,7 +43,12 @@ describe("serve command", () => {
     }));
 
     vi.mock("../systemMessage.js", () => ({
-      constructSystemMessage: vi.fn(() => Promise.resolve("System message")),
+      constructSystemMessage: vi.fn(() =>
+        Promise.resolve([{ type: "text", text: "System message" }]),
+      ),
+      flattenSystemMessage: vi.fn((blocks: Array<{ text: string }>) =>
+        blocks.map((b) => b.text).join("\n\n"),
+      ),
     }));
 
     vi.mock("../telemetry/telemetryService.js", () => ({

@@ -29,7 +29,10 @@ import {
   loadOrCreateSessionById,
 } from "../session.js";
 import { messageQueue } from "../stream/messageQueue.js";
-import { constructSystemMessage } from "../systemMessage.js";
+import {
+  constructSystemMessage,
+  flattenSystemMessage,
+} from "../systemMessage.js";
 import { telemetryService } from "../telemetry/telemetryService.js";
 import { reportFailureTool } from "../tools/reportFailure.js";
 import { gracefulExit, updateAgentMetadata } from "../util/exit.js";
@@ -153,17 +156,20 @@ export async function serve(prompt?: string, options: ServeOptions = {}) {
   }
 
   // Initialize session with system message
-  const systemMessage = await constructSystemMessage(
+  const systemMessageBlocks = await constructSystemMessage(
     permissionsState.currentMode,
     options.rule,
     undefined,
     true,
   );
 
   const initialHistory: ChatHistoryItem[] = [];
-  if (systemMessage) {
+  if (systemMessageBlocks.length > 0) {
     initialHistory.push({
-      message: { role: "system" as const, content: systemMessage },
+      message: {
+        role: "system" as const,
+        content: flattenSystemMessage(systemMessageBlocks),
+      },
       contextItems: [],
     });
   }

@@ -70,7 +70,8 @@ describe("SystemMessageService", () => {
         headless: true,
       };
 
-      constructSystemMessageMock.mockResolvedValue("Test system message");
+      const mockBlocks = [{ type: "text", text: "Test system message" }];
+      constructSystemMessageMock.mockResolvedValue(mockBlocks);
 
       await service.initialize(config);
       const message = await service.getSystemMessage("normal");
@@ -81,7 +82,7 @@ describe("SystemMessageService", () => {
         "json",
         true,
       );
-      expect(message).toBe("Test system message");
+      expect(message).toEqual(mockBlocks);
     });
   });
 
@@ -96,7 +97,9 @@ describe("SystemMessageService", () => {
         format: "json",
       });
 
-      constructSystemMessageMock.mockResolvedValue("Updated message");
+      constructSystemMessageMock.mockResolvedValue([
+        { type: "text", text: "Updated message" },
+      ]);
       await service.getSystemMessage("normal");
 
       expect(constructSystemMessageMock).toHaveBeenCalledWith(
@@ -117,7 +120,9 @@ describe("SystemMessageService", () => {
         additionalRules: ["rule2", "rule3"],
       });
 
-      constructSystemMessageMock.mockResolvedValue("Updated message");
+      constructSystemMessageMock.mockResolvedValue([
+        { type: "text", text: "Updated message" },
+      ]);
       await service.getSystemMessage("normal");
 
       expect(constructSystemMessageMock).toHaveBeenCalledWith(

@@ -1,5 +1,8 @@
 import { PermissionMode } from "../permissions/types.js";
-import { constructSystemMessage } from "../systemMessage.js";
+import {
+  constructSystemMessage,
+  SystemMessageBlock,
+} from "../systemMessage.js";
 import { logger } from "../util/logger.js";
 
 import { BaseService } from "./BaseService.js";
@@ -46,7 +49,9 @@ export class SystemMessageService extends BaseService<SystemMessageServiceState>
   /**
    * Get a fresh system message with current mode and configuration
    */
-  public async getSystemMessage(currentMode: PermissionMode): Promise<string> {
+  public async getSystemMessage(
+    currentMode: PermissionMode,
+  ): Promise<SystemMessageBlock[]> {
     const { additionalRules, format, headless } = this.currentState;
 
     const systemMessage = await constructSystemMessage(
@@ -58,7 +63,7 @@ export class SystemMessageService extends BaseService<SystemMessageServiceState>
 
     logger.debug("Generated fresh system message", {
       mode: currentMode,
-      messageLength: systemMessage.length,
+      blockCount: systemMessage.length,
     });
 
     return systemMessage;

@@ -43,14 +43,22 @@ vi.mock("../util/logger.js", () => ({
 vi.mock("../services/index.js", () => ({
   services: {
     systemMessage: {
-      getSystemMessage: vi.fn(() => Promise.resolve("System message")),
+      getSystemMessage: vi.fn(() =>
+        Promise.resolve([{ type: "text", text: "System message" }]),
+      ),
     },
     toolPermissions: {
       getState: vi.fn(() => ({ currentMode: "enabled" })),
     },
   },
 }));
 
+vi.mock("../systemMessage.js", () => ({
+  flattenSystemMessage: vi.fn((blocks: Array<{ text: string }>) =>
+    blocks.map((b) => b.text).join("\n\n"),
+  ),
+}));
+
 vi.mock("os", async (importOriginal) => {
   const actual = (await importOriginal()) as object;
   return {

@@ -163,16 +163,17 @@ export async function handleAutoCompaction(
   try {
     // Get system message to calculate its token count for compaction pruning
     // Use provided message if available, otherwise fetch it (for backward compatibility)
-    const systemMessage =
-      providedSystemMessage ??
-      (async () => {
-        const { services } = await import("../services/index.js");
-        return services.systemMessage.getSystemMessage(
-          services.toolPermissions.getState().currentMode,
-        );
-      })();
-    const resolvedSystemMessage =
-      typeof systemMessage === "string" ? systemMessage : await systemMessage;
+    let resolvedSystemMessage: string;
+    if (providedSystemMessage === undefined) {
+      const { services } = await import("../services/index.js");
+      const { flattenSystemMessage } = await import("../systemMessage.js");
+      const blocks = await services.systemMessage.getSystemMessage(
+        services.toolPermissions.getState().currentMode,
+      );
+      resolvedSystemMessage = flattenSystemMessage(blocks);
+    } else {
+      resolvedSystemMessage = providedSystemMessage;
+    }
 
     const { countChatHistoryItemTokens } = await import("../util/tokenizer.js");
     const systemMessageTokens = countChatHistoryItemTokens(

@@ -52,7 +52,9 @@ vi.mock("../util/logger.js", () => ({
 vi.mock("../services/index.js", () => ({
   services: {
     systemMessage: {
-      getSystemMessage: vi.fn(() => Promise.resolve("System message")),
+      getSystemMessage: vi.fn(() =>
+        Promise.resolve([{ type: "text", text: "System message" }]),
+      ),
     },
     toolPermissions: {
       getState: vi.fn(() => ({ currentMode: "enabled" })),

@@ -4,6 +4,7 @@ import type { ChatHistoryItem } from "core/index.js";
 import type { ChatCompletionTool } from "openai/resources/chat/completions.mjs";
 
 import { services } from "../services/index.js";
+import { flattenSystemMessage, SystemMessageBlock } from "../systemMessage.js";
 import { ToolCall } from "../tools/index.js";
 import { logger } from "../util/logger.js";
 import { validateContextLength } from "../util/tokenizer.js";
@@ -17,7 +18,7 @@ export interface CompactionHelperOptions {
   isCompacting: boolean;
   isHeadless: boolean;
   callbacks?: StreamCallbacks;
-  systemMessage: string;
+  systemMessage: SystemMessageBlock[];
   tools?: ChatCompletionTool[];
 }
 
@@ -42,14 +43,16 @@ export async function handlePreApiCompaction(
     return { chatHistory, wasCompacted: false };
   }
 
+  const systemMessageString = flattenSystemMessage(systemMessage);
+
   const { wasCompacted, chatHistory: preCompactHistory } =
     await handleAutoCompaction(chatHistory, model, llmApi, {
       isHeadless,
       callbacks: {
         onSystemMessage: callbacks?.onSystemMessage,
         onContent: callbacks?.onContent,
       },
-      systemMessage,
+      systemMessage: systemMessageString,
       tools,
     });
 
@@ -84,6 +87,8 @@ export async function handlePostToolValidation(
     return { chatHistory, wasCompacted: false };
   }
 
+  const systemMessageString = flattenSystemMessage(systemMessage);
+
   // Get updated history after tool execution
   const chatHistorySvc = services.chatHistory;
   if (
@@ -98,7 +103,7 @@ export async function handlePostToolValidation(
     chatHistory,
     model,
     safetyBuffer: SAFETY_BUFFER,
-    systemMessage,
+    systemMessage: systemMessageString,
     tools,
   });
 
@@ -117,7 +122,7 @@ export async function handlePostToolValidation(
           onSystemMessage: callbacks?.onSystemMessage,
           onContent: callbacks?.onContent,
         },
-        systemMessage,
+        systemMessage: systemMessageString,
         tools,
       });
 
@@ -136,7 +141,7 @@ export async function handlePostToolValidation(
         chatHistory,
         model,
         safetyBuffer: SAFETY_BUFFER,
-        systemMessage,
+        systemMessage: systemMessageString,
         tools,
       });
 
@@ -185,6 +190,8 @@ export async function handleNormalAutoCompaction(
     return { chatHistory, wasCompacted: false };
   }
 
+  const systemMessageString = flattenSystemMessage(systemMessage);
+
   const chatHistorySvc = services.chatHistory;
   if (
     typeof chatHistorySvc?.isReady === "function" &&
@@ -200,7 +207,7 @@ export async function handleNormalAutoCompaction(
         onSystemMessage: callbacks?.onSystemMessage,
         onContent: callbacks?.onContent,
       },
-      systemMessage,
+      systemMessage: systemMessageString,
       tools,
     });
 

@@ -367,14 +367,33 @@ export function recordStreamTelemetry(options: {
   });
 
   // Mirror core metrics to PostHog for product analytics
+  const cacheReadTokens =
+    fullUsage?.prompt_tokens_details?.cache_read_tokens ?? 0;
+  const cacheWriteTokens =
+    fullUsage?.prompt_tokens_details?.cache_write_tokens ?? 0;
+
   try {
     posthogService.capture("apiRequest", {
       model: model.model,
       durationMs: totalDuration,
       inputTokens: actualInputTokens,
       outputTokens: actualOutputTokens,
       costUsd: cost,
+      cacheReadTokens,
+      cacheWriteTokens,
     });
+
+    // Emit prompt_cache_metrics for the Prompt Cache Performance dashboard
+    if (actualInputTokens > 0) {
+      posthogService.capture("prompt_cache_metrics", {
+        model: model.model,
+        cache_read_tokens: cacheReadTokens,
+        cache_write_tokens: cacheWriteTokens,
+        total_prompt_tokens: actualInputTokens,
+        cache_hit_rate: cacheReadTokens / actualInputTokens,
+        tool_count: tools?.length ?? 0,
+      });
+    }
   } catch {}
 
   return cost;