From 67bc76f9ca939f95b1eb4a707b9c59b83777f416 Mon Sep 17 00:00:00 2001
From: zhangyuxin <1679607834@qq.com>
Date: Mon, 18 May 2026 11:17:02 +0800
Subject: [PATCH] Stabilize workflow prompt caching

Move stable workflow agent instructions and task text ahead of volatile workflow context so Claude Code sees a reusable prompt prefix across workflow turns.

Serialize workflow artifacts and inputs with deterministic key ordering to avoid incidental prompt churn from object insertion order.

Add a CI cache stability check that fixes the Claude Code version envelope and verifies 100 dynamic workflow turns keep more than 90 percent of prompt bytes reusable without calling external model services.

Validation: npm run test:cache; npm test; npm run typecheck; npm run build.
---
 .github/workflows/claude-cache-stability.yml |  22 ++++
 package.json                                 |   3 +-
 src/agents/workflow-context.ts               |  49 +++++--
 tests/claude-cache-stability.test.ts         | 130 +++++++++++++++++++
 4 files changed, 190 insertions(+), 14 deletions(-)
 create mode 100644 .github/workflows/claude-cache-stability.yml
 create mode 100644 tests/claude-cache-stability.test.ts

diff --git a/.github/workflows/claude-cache-stability.yml b/.github/workflows/claude-cache-stability.yml
new file mode 100644
index 00000000..3bffd29c
--- /dev/null
+++ b/.github/workflows/claude-cache-stability.yml
@@ -0,0 +1,22 @@
+name: Claude Cache Stability
+
+on: [pull_request, push]
+
+jobs:
+  claude-cache-stability:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: npm
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Run Claude cache stability test
+        run: npm run test:cache
diff --git a/package.json b/package.json
index 4a8439ab..bb8ed11a 100644
--- a/package.json
+++ b/package.json
@@ -18,7 +18,8 @@
     "smoke:btc": "tsx scripts/real-btc-smoke.ts",
     "smoke:nested-market": "npm run build && tsx scripts/nested-market-smoke.ts",
     "test": "vitest run --config vitest.config.ts",
-    "typecheck": "tsc -p tsconfig.json --noEmit"
+    "typecheck": "tsc -p tsconfig.json --noEmit",
+    "test:cache": "vitest run --config vitest.config.ts tests/claude-cache-stability.test.ts"
   },
   "dependencies": {
     "@modelcontextprotocol/sdk": "^1.23.0",
diff --git a/src/agents/workflow-context.ts b/src/agents/workflow-context.ts
index f7bf118e..c2082b3c 100644
--- a/src/agents/workflow-context.ts
+++ b/src/agents/workflow-context.ts
@@ -5,23 +5,28 @@ export function promptWithWorkflowContext(prompt: string, context: WorkflowAgent
     return prompt;
   }
   return [
+    "Humanize2 workflow agent instructions:",
+    "- You are running as a Humanize2-managed agent.",
+    "- Read the workflow context block after the task before acting.",
+    "- vertexId is the workflow node identity for artifact ownership and routing;",
+    "- shortName is the human-facing agent/session alias and should not replace vertexId in workflow state.",
+    "Deliver expected artifacts back to Humanize2 through the listed MCP tools or JSON-RPC endpoint.",
+    "Do not inspect, signal, attach to, or mutate the Humanize2 hub process or its in-memory runtime state.",
+    "Do not repair workflow state directly; use Humanize2 artifact, board, event, message, or view APIs.",
+    "",
+    "Task:",
+    prompt,
+    "",
     "Humanize2 workflow context:",
     `- workflowRunId: ${context.workflowRunId}`,
     `- vertexId: ${context.vertexId}`,
     `- shortName: ${context.shortName}`,
-    "- vertexId is the workflow node identity for artifact ownership and routing;",
-    "- shortName is the human-facing agent/session alias and should not replace vertexId in workflow state.",
     `- jsonRpcUrl: ${context.jsonRpcUrl}`,
-    `- expectedArtifacts: ${JSON.stringify(context.expectedArtifacts)}`,
-    `- inputs: ${JSON.stringify(context.inputs ?? [])}`,
+    `- expectedArtifacts: ${stableJson(context.expectedArtifacts)}`,
+    `- inputs: ${stableJson(context.inputs ?? [])}`,
     `- mcpToolNames: ${context.mcpToolNames.join(", ")}`,
     "",
-    ...inputSnapshotSection(context),
-    "Deliver expected artifacts back to Humanize2 through the listed MCP tools or JSON-RPC endpoint.",
-    "Do not inspect, signal, attach to, or mutate the Humanize2 hub process or its in-memory runtime state.",
-    "Do not repair workflow state directly; use Humanize2 artifact, board, event, message, or view APIs.",
-    "",
-    prompt
+    ...inputSnapshotSection(context)
   ].join("\n");
 }
 
@@ -38,8 +43,8 @@ export function environmentWithWorkflowContext(
     HUMANIZE2_WORKFLOW_VERTEX_ID: context.vertexId,
     HUMANIZE2_WORKFLOW_SHORT_NAME: context.shortName,
     HUMANIZE2_WORKFLOW_JSONRPC_URL: context.jsonRpcUrl,
-    HUMANIZE2_WORKFLOW_EXPECTED_ARTIFACTS: JSON.stringify(context.expectedArtifacts),
-    HUMANIZE2_WORKFLOW_INPUTS: JSON.stringify(context.inputs ?? []),
+    HUMANIZE2_WORKFLOW_EXPECTED_ARTIFACTS: stableJson(context.expectedArtifacts),
+    HUMANIZE2_WORKFLOW_INPUTS: stableJson(context.inputs ?? []),
     HUMANIZE2_WORKFLOW_MCP_TOOLS: context.mcpToolNames.join(",")
   };
 }
@@ -50,8 +55,26 @@ function inputSnapshotSection(context: WorkflowAgentLaunchContext): string[] {
   }
   return [
     "Declared workflow input snapshots:",
-    JSON.stringify(context.inputs, null, 2),
+    stableJson(context.inputs, 2),
     "Treat these input snapshots as part of the current task contract.",
     ""
   ];
 }
+
+function stableJson(value: unknown, space?: number): string {
+  return JSON.stringify(stableJsonValue(value), null, space);
+}
+
+function stableJsonValue(value: unknown): unknown {
+  if (Array.isArray(value)) {
+    return value.map(stableJsonValue);
+  }
+  if (value === null || typeof value !== "object") {
+    return value;
+  }
+
+  const object = value as Record<string, unknown>;
+  return Object.fromEntries(
+    Object.keys(object).sort().map((key) => [key, stableJsonValue(object[key])])
+  );
+}
diff --git a/tests/claude-cache-stability.test.ts b/tests/claude-cache-stability.test.ts
new file mode 100644
index 00000000..bcffea92
--- /dev/null
+++ b/tests/claude-cache-stability.test.ts
@@ -0,0 +1,130 @@
+import { describe, expect, it } from "vitest";
+
+import { promptWithWorkflowContext } from "../src/agents/workflow-context.js";
+import type { WorkflowAgentLaunchContext } from "../src/agents/types.js";
+
+describe("Claude workflow prompt cache stability", () => {
+  it("keeps more than 90 percent of prompt bytes reusable across 100 dynamic workflow turns", () => {
+    const claudeCodeVersion = "2.1.143";
+    const rounds = 100;
+    const taskPrompt = [
+      "Implement the requested workflow task using the declared artifacts.",
+      stableTaskBody()
+    ].join("\n\n");
+    const prompts = Array.from({ length: rounds }, (_, index) =>
+      withSameClaudeCodeVersionEnvelope(
+        claudeCodeVersion,
+        promptWithWorkflowContext(taskPrompt, contextForTurn(index))
+      )
+    );
+
+    const cache = estimateCacheStability(prompts);
+
+    expect(cache.claudeCodeVersion).toBe(claudeCodeVersion);
+    expect(cache.rounds).toBe(rounds);
+    expect(cache.averagePromptBytes).toBeGreaterThan(10_000);
+    expect(cache.averageReusablePrefixBytes).toBeGreaterThan(10_000);
+    expect(cache.cacheHitRatio).toBeGreaterThan(0.9);
+  });
+});
+
+interface CacheEstimate {
+  claudeCodeVersion: string;
+  rounds: number;
+  averagePromptBytes: number;
+  averageReusablePrefixBytes: number;
+  cacheHitRatio: number;
+}
+
+function withSameClaudeCodeVersionEnvelope(claudeCodeVersion: string, prompt: string): string {
+  return [
+    `Claude Code version: ${claudeCodeVersion}`,
+    "Model: gpt-5.5",
+    "Permission mode: bypassPermissions",
+    "Output format: stream-json",
+    "",
+    prompt
+  ].join("\n");
+}
+
+function estimateCacheStability(prompts: string[]): CacheEstimate {
+  const reusablePrefixBytes = prompts.slice(1).map((prompt, index) =>
+    commonPrefixLength(prompts[index], prompt)
+  );
+  const promptBytes = prompts.map((prompt) => prompt.length);
+  const averagePromptBytes = average(promptBytes);
+  const averageReusablePrefixBytes = average(reusablePrefixBytes);
+  const version = /^Claude Code version: (.+)$/m.exec(prompts[0])?.[1] ?? "unknown";
+
+  return {
+    claudeCodeVersion: version,
+    rounds: prompts.length,
+    averagePromptBytes,
+    averageReusablePrefixBytes,
+    cacheHitRatio: averageReusablePrefixBytes / averagePromptBytes
+  };
+}
+
+function contextForTurn(index: number): WorkflowAgentLaunchContext {
+  return {
+    workflowRunId: `workflow-run-${index.toString().padStart(3, "0")}`,
+    vertexId: `reviewer-${index % 7}`,
+    shortName: `reviewer-${index % 5}`,
+    jsonRpcUrl: `http://127.0.0.1:${4772 + index}/jsonrpc`,
+    expectedArtifacts: [{
+      schema: "rlcr.verdict.v1",
+      name: "verdict"
+    }],
+    inputs: [{
+      kind: "artifact",
+      name: "draft",
+      schema: "draft.v1",
+      label: "Current draft",
+      optional: false,
+      producer: `builder-${index}`,
+      iteration: index + 1,
+      createdAt: `2026-05-16T10:${String(index % 60).padStart(2, "0")}:00.000Z`,
+      content: {
+        b: 2,
+        a: 1,
+        turn: index
+      }
+    }, {
+      kind: "board",
+      id: "loop-status",
+      label: "Loop status",
+      optional: true,
+      updatedAt: `2026-05-16T11:${String(index % 60).padStart(2, "0")}:00.000Z`,
+      value: {
+        status: index % 2 === 0 ? "revise" : "review",
+        requiredFollowUp: [`Fix-${index}`]
+      }
+    }],
+    mcpToolNames: [
+      "artifact_deliver",
+      "workflow_get",
+      "board_patch",
+      "event_emit"
+    ]
+  };
+}
+
+function stableTaskBody(): string {
+  return Array.from({ length: 120 }, (_, index) =>
+    `STABLE_TASK_LINE_${String(index + 1).padStart(3, "0")}: This deterministic task body represents reusable workflow instructions and stays unchanged across turns.`
+  ).join("\n");
+}
+
+function average(values: number[]): number {
+  return values.reduce((total, value) => total + value, 0) / values.length;
+}
+
+function commonPrefixLength(left: string, right: string): number {
+  const limit = Math.min(left.length, right.length);
+  for (let index = 0; index < limit; index += 1) {
+    if (left[index] !== right[index]) {
+      return index;
+    }
+  }
+  return limit;
+}