diff --git a/.github/workflows/claude-cache-stability.yml b/.github/workflows/claude-cache-stability.yml new file mode 100644 index 00000000..3bffd29c --- /dev/null +++ b/.github/workflows/claude-cache-stability.yml @@ -0,0 +1,22 @@ +name: Claude Cache Stability + +on: [pull_request, push] + +jobs: + claude-cache-stability: + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: 22 + cache: npm + + - name: Install dependencies + run: npm ci + + - name: Run Claude cache stability test + run: npm run test:cache diff --git a/package.json b/package.json index 4a8439ab..bb8ed11a 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,8 @@ "smoke:btc": "tsx scripts/real-btc-smoke.ts", "smoke:nested-market": "npm run build && tsx scripts/nested-market-smoke.ts", "test": "vitest run --config vitest.config.ts", - "typecheck": "tsc -p tsconfig.json --noEmit" + "typecheck": "tsc -p tsconfig.json --noEmit", + "test:cache": "vitest run --config vitest.config.ts tests/claude-cache-stability.test.ts" }, "dependencies": { "@modelcontextprotocol/sdk": "^1.23.0", diff --git a/src/agents/workflow-context.ts b/src/agents/workflow-context.ts index f7bf118e..c2082b3c 100644 --- a/src/agents/workflow-context.ts +++ b/src/agents/workflow-context.ts @@ -5,23 +5,28 @@ export function promptWithWorkflowContext(prompt: string, context: WorkflowAgent return prompt; } return [ + "Humanize2 workflow agent instructions:", + "- You are running as a Humanize2-managed agent.", + "- Read the workflow context block after the task before acting.", + "- vertexId is the workflow node identity for artifact ownership and routing;", + "- shortName is the human-facing agent/session alias and should not replace vertexId in workflow state.", + "Deliver expected artifacts back to Humanize2 through the listed MCP tools or JSON-RPC endpoint.", + "Do not inspect, signal, attach to, or mutate the Humanize2 hub process or its in-memory runtime state.", + "Do not repair workflow state directly; use Humanize2 artifact, board, event, message, or view APIs.", + "", + "Task:", + prompt, + "", "Humanize2 workflow context:", `- workflowRunId: ${context.workflowRunId}`, `- vertexId: ${context.vertexId}`, `- shortName: ${context.shortName}`, - "- vertexId is the workflow node identity for artifact ownership and routing;", - "- shortName is the human-facing agent/session alias and should not replace vertexId in workflow state.", `- jsonRpcUrl: ${context.jsonRpcUrl}`, - `- expectedArtifacts: ${JSON.stringify(context.expectedArtifacts)}`, - `- inputs: ${JSON.stringify(context.inputs ?? [])}`, + `- expectedArtifacts: ${stableJson(context.expectedArtifacts)}`, + `- inputs: ${stableJson(context.inputs ?? [])}`, `- mcpToolNames: ${context.mcpToolNames.join(", ")}`, "", - ...inputSnapshotSection(context), - "Deliver expected artifacts back to Humanize2 through the listed MCP tools or JSON-RPC endpoint.", - "Do not inspect, signal, attach to, or mutate the Humanize2 hub process or its in-memory runtime state.", - "Do not repair workflow state directly; use Humanize2 artifact, board, event, message, or view APIs.", - "", - prompt + ...inputSnapshotSection(context) ].join("\n"); } @@ -38,8 +43,8 @@ export function environmentWithWorkflowContext( HUMANIZE2_WORKFLOW_VERTEX_ID: context.vertexId, HUMANIZE2_WORKFLOW_SHORT_NAME: context.shortName, HUMANIZE2_WORKFLOW_JSONRPC_URL: context.jsonRpcUrl, - HUMANIZE2_WORKFLOW_EXPECTED_ARTIFACTS: JSON.stringify(context.expectedArtifacts), - HUMANIZE2_WORKFLOW_INPUTS: JSON.stringify(context.inputs ?? []), + HUMANIZE2_WORKFLOW_EXPECTED_ARTIFACTS: stableJson(context.expectedArtifacts), + HUMANIZE2_WORKFLOW_INPUTS: stableJson(context.inputs ?? []), HUMANIZE2_WORKFLOW_MCP_TOOLS: context.mcpToolNames.join(",") }; } @@ -50,8 +55,26 @@ function inputSnapshotSection(context: WorkflowAgentLaunchContext): string[] { } return [ "Declared workflow input snapshots:", - JSON.stringify(context.inputs, null, 2), + stableJson(context.inputs, 2), "Treat these input snapshots as part of the current task contract.", "" ]; } + +function stableJson(value: unknown, space?: number): string { + return JSON.stringify(stableJsonValue(value), null, space); +} + +function stableJsonValue(value: unknown): unknown { + if (Array.isArray(value)) { + return value.map(stableJsonValue); + } + if (value === null || typeof value !== "object") { + return value; + } + + const object = value as Record; + return Object.fromEntries( + Object.keys(object).sort().map((key) => [key, stableJsonValue(object[key])]) + ); +} diff --git a/tests/claude-cache-stability.test.ts b/tests/claude-cache-stability.test.ts new file mode 100644 index 00000000..bcffea92 --- /dev/null +++ b/tests/claude-cache-stability.test.ts @@ -0,0 +1,130 @@ +import { describe, expect, it } from "vitest"; + +import { promptWithWorkflowContext } from "../src/agents/workflow-context.js"; +import type { WorkflowAgentLaunchContext } from "../src/agents/types.js"; + +describe("Claude workflow prompt cache stability", () => { + it("keeps more than 90 percent of prompt bytes reusable across 100 dynamic workflow turns", () => { + const claudeCodeVersion = "2.1.143"; + const rounds = 100; + const taskPrompt = [ + "Implement the requested workflow task using the declared artifacts.", + stableTaskBody() + ].join("\n\n"); + const prompts = Array.from({ length: rounds }, (_, index) => + withSameClaudeCodeVersionEnvelope( + claudeCodeVersion, + promptWithWorkflowContext(taskPrompt, contextForTurn(index)) + ) + ); + + const cache = estimateCacheStability(prompts); + + expect(cache.claudeCodeVersion).toBe(claudeCodeVersion); + expect(cache.rounds).toBe(rounds); + expect(cache.averagePromptBytes).toBeGreaterThan(10_000); + expect(cache.averageReusablePrefixBytes).toBeGreaterThan(10_000); + expect(cache.cacheHitRatio).toBeGreaterThan(0.9); + }); +}); + +interface CacheEstimate { + claudeCodeVersion: string; + rounds: number; + averagePromptBytes: number; + averageReusablePrefixBytes: number; + cacheHitRatio: number; +} + +function withSameClaudeCodeVersionEnvelope(claudeCodeVersion: string, prompt: string): string { + return [ + `Claude Code version: ${claudeCodeVersion}`, + "Model: gpt-5.5", + "Permission mode: bypassPermissions", + "Output format: stream-json", + "", + prompt + ].join("\n"); +} + +function estimateCacheStability(prompts: string[]): CacheEstimate { + const reusablePrefixBytes = prompts.slice(1).map((prompt, index) => + commonPrefixLength(prompts[index], prompt) + ); + const promptBytes = prompts.map((prompt) => prompt.length); + const averagePromptBytes = average(promptBytes); + const averageReusablePrefixBytes = average(reusablePrefixBytes); + const version = /^Claude Code version: (.+)$/m.exec(prompts[0])?.[1] ?? "unknown"; + + return { + claudeCodeVersion: version, + rounds: prompts.length, + averagePromptBytes, + averageReusablePrefixBytes, + cacheHitRatio: averageReusablePrefixBytes / averagePromptBytes + }; +} + +function contextForTurn(index: number): WorkflowAgentLaunchContext { + return { + workflowRunId: `workflow-run-${index.toString().padStart(3, "0")}`, + vertexId: `reviewer-${index % 7}`, + shortName: `reviewer-${index % 5}`, + jsonRpcUrl: `http://127.0.0.1:${4772 + index}/jsonrpc`, + expectedArtifacts: [{ + schema: "rlcr.verdict.v1", + name: "verdict" + }], + inputs: [{ + kind: "artifact", + name: "draft", + schema: "draft.v1", + label: "Current draft", + optional: false, + producer: `builder-${index}`, + iteration: index + 1, + createdAt: `2026-05-16T10:${String(index % 60).padStart(2, "0")}:00.000Z`, + content: { + b: 2, + a: 1, + turn: index + } + }, { + kind: "board", + id: "loop-status", + label: "Loop status", + optional: true, + updatedAt: `2026-05-16T11:${String(index % 60).padStart(2, "0")}:00.000Z`, + value: { + status: index % 2 === 0 ? "revise" : "review", + requiredFollowUp: [`Fix-${index}`] + } + }], + mcpToolNames: [ + "artifact_deliver", + "workflow_get", + "board_patch", + "event_emit" + ] + }; +} + +function stableTaskBody(): string { + return Array.from({ length: 120 }, (_, index) => + `STABLE_TASK_LINE_${String(index + 1).padStart(3, "0")}: This deterministic task body represents reusable workflow instructions and stays unchanged across turns.` + ).join("\n"); +} + +function average(values: number[]): number { + return values.reduce((total, value) => total + value, 0) / values.length; +} + +function commonPrefixLength(left: string, right: string): number { + const limit = Math.min(left.length, right.length); + for (let index = 0; index < limit; index += 1) { + if (left[index] !== right[index]) { + return index; + } + } + return limit; +}