PolyArch · SihaoLiu · May 19, 2026 · May 18, 2026
diff --git a/.github/workflows/claude-cache-stability.yml b/.github/workflows/claude-cache-stability.yml
@@ -0,0 +1,22 @@
+name: Claude Cache Stability
+
+on: [pull_request, push]
+
+jobs:
+  claude-cache-stability:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: npm
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Run Claude cache stability test
+        run: npm run test:cache
diff --git a/package.json b/package.json
@@ -18,7 +18,8 @@
     "smoke:btc": "tsx scripts/real-btc-smoke.ts",
     "smoke:nested-market": "npm run build && tsx scripts/nested-market-smoke.ts",
     "test": "vitest run --config vitest.config.ts",
-    "typecheck": "tsc -p tsconfig.json --noEmit"
+    "typecheck": "tsc -p tsconfig.json --noEmit",
+    "test:cache": "vitest run --config vitest.config.ts tests/claude-cache-stability.test.ts"
   },
   "dependencies": {
     "@modelcontextprotocol/sdk": "^1.23.0",

diff --git a/src/agents/workflow-context.ts b/src/agents/workflow-context.ts
@@ -5,23 +5,28 @@ export function promptWithWorkflowContext(prompt: string, context: WorkflowAgent
     return prompt;
   }
   return [
+    "Humanize2 workflow agent instructions:",
+    "- You are running as a Humanize2-managed agent.",
+    "- Read the workflow context block after the task before acting.",
+    "- vertexId is the workflow node identity for artifact ownership and routing;",
+    "- shortName is the human-facing agent/session alias and should not replace vertexId in workflow state.",
+    "Deliver expected artifacts back to Humanize2 through the listed MCP tools or JSON-RPC endpoint.",
+    "Do not inspect, signal, attach to, or mutate the Humanize2 hub process or its in-memory runtime state.",
+    "Do not repair workflow state directly; use Humanize2 artifact, board, event, message, or view APIs.",
+    "",
+    "Task:",
+    prompt,
+    "",
     "Humanize2 workflow context:",
     `- workflowRunId: ${context.workflowRunId}`,
     `- vertexId: ${context.vertexId}`,
     `- shortName: ${context.shortName}`,
-    "- vertexId is the workflow node identity for artifact ownership and routing;",
-    "- shortName is the human-facing agent/session alias and should not replace vertexId in workflow state.",
     `- jsonRpcUrl: ${context.jsonRpcUrl}`,
-    `- expectedArtifacts: ${JSON.stringify(context.expectedArtifacts)}`,
-    `- inputs: ${JSON.stringify(context.inputs ?? [])}`,
+    `- expectedArtifacts: ${stableJson(context.expectedArtifacts)}`,
+    `- inputs: ${stableJson(context.inputs ?? [])}`,
     `- mcpToolNames: ${context.mcpToolNames.join(", ")}`,
     "",
-    ...inputSnapshotSection(context),
-    "Deliver expected artifacts back to Humanize2 through the listed MCP tools or JSON-RPC endpoint.",
-    "Do not inspect, signal, attach to, or mutate the Humanize2 hub process or its in-memory runtime state.",
-    "Do not repair workflow state directly; use Humanize2 artifact, board, event, message, or view APIs.",
-    "",
-    prompt
+    ...inputSnapshotSection(context)
   ].join("\n");
 }
 
@@ -38,8 +43,8 @@ export function environmentWithWorkflowContext(
     HUMANIZE2_WORKFLOW_VERTEX_ID: context.vertexId,
     HUMANIZE2_WORKFLOW_SHORT_NAME: context.shortName,
     HUMANIZE2_WORKFLOW_JSONRPC_URL: context.jsonRpcUrl,
-    HUMANIZE2_WORKFLOW_EXPECTED_ARTIFACTS: JSON.stringify(context.expectedArtifacts),
-    HUMANIZE2_WORKFLOW_INPUTS: JSON.stringify(context.inputs ?? []),
+    HUMANIZE2_WORKFLOW_EXPECTED_ARTIFACTS: stableJson(context.expectedArtifacts),
+    HUMANIZE2_WORKFLOW_INPUTS: stableJson(context.inputs ?? []),
     HUMANIZE2_WORKFLOW_MCP_TOOLS: context.mcpToolNames.join(",")
   };
 }
@@ -50,8 +55,26 @@ function inputSnapshotSection(context: WorkflowAgentLaunchContext): string[] {
   }
   return [
     "Declared workflow input snapshots:",
-    JSON.stringify(context.inputs, null, 2),
+    stableJson(context.inputs, 2),
     "Treat these input snapshots as part of the current task contract.",
     ""
   ];
 }
+
+function stableJson(value: unknown, space?: number): string {
+  return JSON.stringify(stableJsonValue(value), null, space);
+}
+
+function stableJsonValue(value: unknown): unknown {
+  if (Array.isArray(value)) {
+    return value.map(stableJsonValue);
+  }
+  if (value === null || typeof value !== "object") {
+    return value;
+  }
+
+  const object = value as Record<string, unknown>;
+  return Object.fromEntries(
+    Object.keys(object).sort().map((key) => [key, stableJsonValue(object[key])])
+  );
+}
diff --git a/tests/claude-cache-stability.test.ts b/tests/claude-cache-stability.test.ts
@@ -0,0 +1,130 @@
+import { describe, expect, it } from "vitest";
+
+import { promptWithWorkflowContext } from "../src/agents/workflow-context.js";
+import type { WorkflowAgentLaunchContext } from "../src/agents/types.js";
+
+describe("Claude workflow prompt cache stability", () => {
+  it("keeps more than 90 percent of prompt bytes reusable across 100 dynamic workflow turns", () => {
+    const claudeCodeVersion = "2.1.143";
+    const rounds = 100;
+    const taskPrompt = [
+      "Implement the requested workflow task using the declared artifacts.",
+      stableTaskBody()
+    ].join("\n\n");
+    const prompts = Array.from({ length: rounds }, (_, index) =>
+      withSameClaudeCodeVersionEnvelope(
+        claudeCodeVersion,
+        promptWithWorkflowContext(taskPrompt, contextForTurn(index))
+      )
+    );
+
+    const cache = estimateCacheStability(prompts);
+
+    expect(cache.claudeCodeVersion).toBe(claudeCodeVersion);
+    expect(cache.rounds).toBe(rounds);
+    expect(cache.averagePromptBytes).toBeGreaterThan(10_000);
+    expect(cache.averageReusablePrefixBytes).toBeGreaterThan(10_000);
+    expect(cache.cacheHitRatio).toBeGreaterThan(0.9);
+  });
+});
+
+interface CacheEstimate {
+  claudeCodeVersion: string;
+  rounds: number;
+  averagePromptBytes: number;
+  averageReusablePrefixBytes: number;
+  cacheHitRatio: number;
+}
+
+function withSameClaudeCodeVersionEnvelope(claudeCodeVersion: string, prompt: string): string {
+  return [
+    `Claude Code version: ${claudeCodeVersion}`,
+    "Model: gpt-5.5",
+    "Permission mode: bypassPermissions",
+    "Output format: stream-json",
+    "",
+    prompt
+  ].join("\n");
+}
+
+function estimateCacheStability(prompts: string[]): CacheEstimate {
+  const reusablePrefixBytes = prompts.slice(1).map((prompt, index) =>
+    commonPrefixLength(prompts[index], prompt)
+  );
+  const promptBytes = prompts.map((prompt) => prompt.length);
+  const averagePromptBytes = average(promptBytes);
+  const averageReusablePrefixBytes = average(reusablePrefixBytes);
+  const version = /^Claude Code version: (.+)$/m.exec(prompts[0])?.[1] ?? "unknown";
+
+  return {
+    claudeCodeVersion: version,
+    rounds: prompts.length,
+    averagePromptBytes,
+    averageReusablePrefixBytes,
+    cacheHitRatio: averageReusablePrefixBytes / averagePromptBytes
+  };
+}
+
+function contextForTurn(index: number): WorkflowAgentLaunchContext {
+  return {
+    workflowRunId: `workflow-run-${index.toString().padStart(3, "0")}`,
+    vertexId: `reviewer-${index % 7}`,
+    shortName: `reviewer-${index % 5}`,
+    jsonRpcUrl: `http://127.0.0.1:${4772 + index}/jsonrpc`,
+    expectedArtifacts: [{
+      schema: "rlcr.verdict.v1",
+      name: "verdict"
+    }],
+    inputs: [{
+      kind: "artifact",
+      name: "draft",
+      schema: "draft.v1",
+      label: "Current draft",
+      optional: false,
+      producer: `builder-${index}`,
+      iteration: index + 1,
+      createdAt: `2026-05-16T10:${String(index % 60).padStart(2, "0")}:00.000Z`,
+      content: {
+        b: 2,
+        a: 1,
+        turn: index
+      }
+    }, {
+      kind: "board",
+      id: "loop-status",
+      label: "Loop status",
+      optional: true,
+      updatedAt: `2026-05-16T11:${String(index % 60).padStart(2, "0")}:00.000Z`,
+      value: {
+        status: index % 2 === 0 ? "revise" : "review",
+        requiredFollowUp: [`Fix-${index}`]
+      }
+    }],
+    mcpToolNames: [
+      "artifact_deliver",
+      "workflow_get",
+      "board_patch",
+      "event_emit"
+    ]
+  };
+}
+
+function stableTaskBody(): string {
+  return Array.from({ length: 120 }, (_, index) =>
+    `STABLE_TASK_LINE_${String(index + 1).padStart(3, "0")}: This deterministic task body represents reusable workflow instructions and stays unchanged across turns.`
+  ).join("\n");
+}
+
+function average(values: number[]): number {
+  return values.reduce((total, value) => total + value, 0) / values.length;
+}
+
+function commonPrefixLength(left: string, right: string): number {
+  const limit = Math.min(left.length, right.length);
+  for (let index = 0; index < limit; index += 1) {
+    if (left[index] !== right[index]) {
+      return index;
+    }
+  }
+  return limit;
+}